diff --git a/.circleci/main.yml b/.circleci/main.yml index a13300a78d..91add3529f 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -68,7 +68,9 @@ commands: steps: - run: name: Getting Sample BIDS Data - command: git clone https://github.com/bids-standard/bids-examples.git + command: | + mkdir -p /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples + git clone https://github.com/bids-standard/bids-examples.git /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples get-singularity: parameters: version: @@ -156,7 +158,7 @@ commands: then TAG=nightly else - TAG="${CIRCLE_BRANCH//\//_}" + TAG=`echo ${CIRCLE_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` fi DOCKER_TAG="ghcr.io/${CIRCLE_PROJECT_USERNAME,,}/${CIRCLE_PROJECT_REPONAME,,}:${TAG,,}" if [[ -n "<< parameters.variant >>" ]] @@ -172,7 +174,7 @@ commands: name: Testing Singularity installation command: | pip install -r dev/circleci_data/requirements.txt - coverage run -m pytest --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py + coverage run -m pytest --capture=no --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py jobs: combine-coverage: diff --git a/.github/workflows/build_C-PAC.yml b/.github/workflows/build_C-PAC.yml index d126f6a778..ef7a196cef 100644 --- a/.github/workflows/build_C-PAC.yml +++ b/.github/workflows/build_C-PAC.yml @@ -42,7 +42,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` DOCKERFILE=.github/Dockerfiles/C-PAC.develop$VARIANT-$OS.Dockerfile elif [[ $GITHUB_BRANCH == 'develop' ]] then diff --git a/.github/workflows/regression_test_full.yml b/.github/workflows/regression_test_full.yml index 6dba2d1bf2..20d25a9316 100644 --- a/.github/workflows/regression_test_full.yml +++ b/.github/workflows/regression_test_full.yml @@ -13,7 +13,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/regression_test_lite.yml b/.github/workflows/regression_test_lite.yml index 4e6b5a46f6..87aba8a5bd 100644 --- a/.github/workflows/regression_test_lite.yml +++ b/.github/workflows/regression_test_lite.yml @@ -37,7 +37,7 @@ jobs: run: | if [[ ! $GITHUB_REF_NAME == 'main' ]] && [[ ! $GITHUB_REF_NAME == 'develop' ]] then - TAG=${GITHUB_REF_NAME//\//_} + TAG=`echo ${GITHUB_REF_NAME} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_REF_NAME == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/smoke_test_participant.yml b/.github/workflows/smoke_test_participant.yml index 3fde0de8aa..6b7e219775 100644 --- a/.github/workflows/smoke_test_participant.yml +++ b/.github/workflows/smoke_test_participant.yml @@ -68,7 +68,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -133,7 +133,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -192,7 +192,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly diff --git a/.ruff.toml b/.ruff.toml index 265427a1ab..590d3baa47 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -13,6 +13,7 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules "CPAC/utils/sklearn.py" = ["RUF003"] "CPAC/utils/tests/old_functions.py" = ["C", "D", "E", "EM", "PLW", "RET"] "CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed +"dev/circleci_data/conftest.py" = ["F401"] "setup.py" = ["D1"] [lint.flake8-import-conventions.extend-aliases] diff --git a/CHANGELOG.md b/CHANGELOG.md index df8f40a666..a8bb98da0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Moved `pygraphviz` from requirements to `graphviz` optional dependencies group. +- Split `ResourcePool` into three classes: `Resource`, `ResourcePool`, and `StratPool`. ### Fixed diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py index f8bfc1a0b8..e26342ffb5 100644 --- a/CPAC/alff/alff.py +++ b/CPAC/alff/alff.py @@ -22,7 +22,7 @@ from CPAC.alff.utils import get_opt_string from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.utils.interfaces import Function from CPAC.utils.utils import check_prov_for_regtool diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 0f4e770f97..5a6acd286e 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -34,7 +34,7 @@ wb_command, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge diff --git a/CPAC/conftest.py b/CPAC/conftest.py new file mode 100644 index 0000000000..52113ebd40 --- /dev/null +++ b/CPAC/conftest.py @@ -0,0 +1,32 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Global pytest configuration.""" + +from pathlib import Path + +import pytest + + +@pytest.fixture +def bids_examples(cache: pytest.Cache) -> Path: + """Get cached example BIDS directories.""" + bids_dir = cache.mkdir("bids-examples").absolute() + if not (bids_dir.exists() and list(bids_dir.iterdir())): + from git import Repo + + Repo.clone_from("https://github.com/bids-standard/bids-examples.git", bids_dir) + return bids_dir diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index a7f0eaefcc..5f0728b628 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -32,7 +32,7 @@ run_fsl_topup, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils import function from CPAC.utils.datasource import match_epi_fmaps from CPAC.utils.interfaces.function import Function @@ -438,11 +438,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): node, out = strat_pool.get_data("pe-direction") wf.connect(node, out, match_epi_fmaps_node, "bold_pedir") - # interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - # 'desc-brain_bold': 'opposite_pe_epi_brain'} - # wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - # interface, wf, cfg, strat_pool, pipe_num, opt) - func_get_brain_mask = pe.Node( interface=preprocess.Automask(), name=f"afni_mask_opposite_pe_{pipe_num}" ) @@ -530,10 +525,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, undistort_func_mean, "reference_image") wf.connect(convert_afni_warp, "ants_warp", undistort_func_mean, "transforms") - # interface = {'desc-preproc_bold': (undistort_func_mean, 'output_image')} - # wf, strat_pool = wrap_block([bold_mask_afni], - # interface, wf, cfg, strat_pool, pipe_num, opt) - remask = pe.Node( interface=preprocess.Automask(), name=f"afni_remask_boldmask_{pipe_num}" ) @@ -764,7 +755,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(run_topup, "out_jacs", vnum_base, "jac_matrix_list") wf.connect(run_topup, "out_warps", vnum_base, "warp_field_list") - mean_bold = strat_pool.node_data("sbref") + mean_bold = strat_pool.get_data("sbref") flirt = pe.Node(interface=fsl.FLIRT(), name="flirt") flirt.inputs.dof = 6 diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index 60c8ccf5c9..2105503a19 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -14,12 +14,21 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from CPAC.utils.datasource import create_func_datasource, ingress_func_metadata +"""Ingress functional data for preprocessing.""" + +from CPAC.utils.strategy import Strategy def connect_func_ingress( - workflow, strat_list, c, sub_dict, subject_id, input_creds_path, unique_id=None + workflow, + strat_list: list[Strategy], + c, + sub_dict, + subject_id, + input_creds_path, + unique_id=None, ): + """Connect functional ingress workflow.""" for num_strat, strat in enumerate(strat_list): if "func" in sub_dict: func_paths_dict = sub_dict["func"] @@ -31,7 +40,9 @@ def connect_func_ingress( else: workflow_name = f"func_gather_{unique_id}_{num_strat}" - func_wf = create_func_datasource(func_paths_dict, workflow_name) + func_wf = strat._resource_pool.create_func_datasource( + func_paths_dict, workflow_name + ) func_wf.inputs.inputnode.set( subject=subject_id, @@ -47,8 +58,6 @@ def connect_func_ingress( } ) - (workflow, strat.rpool, diff, blip, fmap_rp_list) = ingress_func_metadata( - workflow, c, strat.rpool, sub_dict, subject_id, input_creds_path, unique_id - ) + diff, blip, fmap_rp_list = strat.rpool.ingress_func_metadata() - return (workflow, diff, blip, fmap_rp_list) + return strat.rpool.wf, diff, blip, fmap_rp_list diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index bea7d2e29c..dfec8ab91c 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -31,7 +31,7 @@ motion_power_statistics, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options from CPAC.utils.interfaces.function import Function from CPAC.utils.utils import check_prov_for_motion_tool @@ -830,7 +830,7 @@ def motion_estimate_filter(wf, cfg, strat_pool, pipe_num, opt=None): notch.inputs.lowpass_cutoff = opt.get("lowpass_cutoff") notch.inputs.filter_order = opt.get("filter_order") - movement_parameters = strat_pool.node_data("desc-movementParameters_motion") + movement_parameters = strat_pool.get_data("desc-movementParameters_motion") wf.connect( movement_parameters.node, movement_parameters.out, notch, "motion_params" ) diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 7004b4f025..69b856509a 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -22,7 +22,7 @@ from CPAC.func_preproc.utils import nullify from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.ants import ( AI, # niworkflows @@ -993,7 +993,7 @@ def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None): # and this function has been changed. # CHANGES: - # * Converted from a plain function to a CPAC.pipeline.nodeblock.NodeBlockFunction + # * Converted from a plain function to a CPAC.pipeline.engine.nodeblock.NodeBlockFunction # * Removed Registration version check # * Hardcoded Registration parameters instead of loading epi_atlasbased_brainmask.json # * Uses C-PAC's ``FSL-AFNI-brain-probseg`` template in place of ``templateflow.api.get("MNI152NLin2009cAsym", resolution=1, label="brain", suffix="probseg")`` diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py index f58380a7fd..9b7da2ed4c 100644 --- a/CPAC/func_preproc/tests/test_preproc_connections.py +++ b/CPAC/func_preproc/tests/test_preproc_connections.py @@ -36,7 +36,6 @@ ) from CPAC.func_preproc.func_preproc import func_normalize from CPAC.nuisance.nuisance import choose_nuisance_blocks -from CPAC.pipeline.cpac_pipeline import connect_pipeline from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine import Workflow from CPAC.registration.registration import ( @@ -81,7 +80,7 @@ "from-template_to-T1w_mode-image_desc-linear_xfm", ] -NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests +NUM_TESTS = 8 # number of parameterizations to run for many-parameter tests def _filter_assertion_message( @@ -268,7 +267,7 @@ def test_motion_filter_connections( if not rpool.check_rpool("desc-cleaned_bold"): pipeline_blocks += choose_nuisance_blocks(c, generate_only) wf = Workflow(re.sub(r"[\[\]\-\:\_ \'\",]", "", str(rpool))) - connect_pipeline(wf, c, rpool, pipeline_blocks) + rpool.connect_pipeline(wf, c, pipeline_blocks) # Check that filtering is happening as expected filter_switch_key = [ "functional_preproc", diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 4229fc30c6..962d444a4e 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -21,17 +21,16 @@ import nipype.interfaces.io as nio from indi_aws import aws_utils +from CPAC.func_preproc.func_ingress import connect_func_ingress from CPAC.longitudinal_pipeline.longitudinal_preproc import subject_specific_template from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.cpac_pipeline import ( build_anat_preproc_stack, build_segmentation_stack, build_T1w_registration_stack, - connect_pipeline, - initialize_nipype_wf, ) -from CPAC.pipeline.engine import ingress_output_dir, initiate_rpool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine import ResourcePool +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration import ( create_fsl_flirt_linear_reg, create_fsl_fnirt_nonlinear_reg, @@ -428,16 +427,13 @@ def anat_longitudinal_wf(subject_id, sub_list, config): except KeyError: input_creds_path = None - workflow = initialize_nipype_wf( - config, - sub_list[0], - # just grab the first one for the name - name="anat_longitudinal_pre-preproc", + rpool = ResourcePool( + cfg=config, + data_paths=session, + pipeline_name="anat_longitudinal_pre-preproc", ) - - workflow, rpool = initiate_rpool(workflow, config, session) pipeline_blocks = build_anat_preproc_stack(rpool, config) - workflow = connect_pipeline(workflow, config, rpool, pipeline_blocks) + workflow = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) session_wfs[unique_id] = rpool @@ -473,13 +469,6 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) for strat in strats_brain_dct.keys(): - wf = initialize_nipype_wf( - config, - sub_list[0], - # just grab the first one for the name - name=f"template_node_{strat}", - ) - config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" template_node_name = f"longitudinal_anat_template_{strat}" @@ -507,9 +496,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config): template_node.inputs.input_skull_list = strats_head_dct[strat] long_id = f"longitudinal_{subject_id}_strat-{strat}" - - wf, rpool = initiate_rpool(wf, config, part_id=long_id) - + rpool = ResourcePool( + cfg=config, part_id=long_id, pipeline_name=f"template_node_{strat}" + ) rpool.set_data( "space-longitudinal_desc-brain_T1w", template_node, @@ -552,7 +541,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): pipeline_blocks = build_segmentation_stack(rpool, config, pipeline_blocks) - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) excl = [ "space-longitudinal_desc-brain_T1w", @@ -574,7 +563,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): creds_path = session["creds_path"] if creds_path and "none" not in creds_path.lower(): if os.path.exists(creds_path): - input_creds_path = os.path.abspath(creds_path) + session["creds_path"] = os.path.abspath(creds_path) else: err_msg = ( 'Credentials path: "%s" for subject "%s" ' @@ -583,18 +572,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) raise Exception(err_msg) else: - input_creds_path = None + session["creds_path"] = None except KeyError: - input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) - - wf, rpool = initiate_rpool(wf, config, session) + session["creds_path"] = None config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" - rpool = ingress_output_dir( - config, rpool, long_id, creds_path=input_creds_path - ) + rpool = ResourcePool(cfg=config, data_paths=session) + wf = rpool.wf + rpool.ingress_output_dir() select_node_name = f"select_{unique_id}" select_sess = pe.Node( @@ -654,17 +639,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config): input_creds_path = None except KeyError: input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) - - wf, rpool = initiate_rpool(wf, config, session) - + session["creds_path"] = input_creds_path + rpool = ResourcePool(cfg=config, data_paths=session) pipeline_blocks = [ warp_longitudinal_T1w_to_template, warp_longitudinal_seg_to_T1w, ] - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) rpool.gather_pipes(wf, config) diff --git a/CPAC/network_centrality/pipeline.py b/CPAC/network_centrality/pipeline.py index e486f8eff0..407489fd9f 100644 --- a/CPAC/network_centrality/pipeline.py +++ b/CPAC/network_centrality/pipeline.py @@ -19,7 +19,7 @@ from CPAC.network_centrality.network_centrality import create_centrality_wf from CPAC.network_centrality.utils import check_centrality_params, create_merge_node from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 45337a0c23..04807755b7 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -37,8 +37,8 @@ TR_string_to_float, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.engine import ResourcePool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.registration.registration import ( apply_transform, warp_timeseries_to_EPItemplate, @@ -2363,7 +2363,7 @@ def erode_mask_WM(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "bold") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "bold") @nodeblock( @@ -2407,40 +2407,22 @@ def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, op outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_T1w(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "T1w") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "T1w") def nuisance_regressors_generation( wf: Workflow, cfg: Configuration, - strat_pool: ResourcePool, + strat_pool: StratPool, pipe_num: int, - opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: - """Generate nuisance regressors. - - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool : ~CPAC.pipeline.engine.ResourcePool - - pipe_num : int - - opt : dict - - space : str - T1w or bold - - Returns - ------- - wf : nipype.pipeline.engine.workflows.Workflow - - outputs : dict - """ + """Generate nuisance regressors.""" + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors to generate + return wf, {} prefixes = [f"space-{space}_"] * 2 reg_tool = None if space == "T1w": @@ -2664,7 +2646,7 @@ def nuisance_regressors_generation( return (wf, outputs) -def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): +def nuisance_regression(wf, cfg, strat_pool: StratPool, pipe_num, opt, space, res=None): """Nuisance regression in native (BOLD) or template space. Parameters @@ -2681,7 +2663,11 @@ def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): outputs : dict """ - opt = strat_pool.regressor_dct(cfg) + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors + return wf, {} bandpass = "Bandpass" in opt bandpass_before = ( bandpass diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index 40811b9e77..4e92fe7f45 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -25,12 +25,16 @@ import sys import time from time import strftime +from typing import Any import yaml -import nipype +import nipype # type: ignore [import-untyped] from nipype import config, logging -from flowdump import save_workflow_json, WorkflowJSONMeta -from indi_aws import aws_utils, fetch_creds +from flowdump import ( # type: ignore [import-untyped] + save_workflow_json, + WorkflowJSONMeta, +) +from indi_aws import aws_utils, fetch_creds # type: ignore [import-untyped] import CPAC from CPAC.alff.alff import alff_falff, alff_falff_space_template @@ -128,9 +132,8 @@ ) # pylint: disable=wrong-import-order -from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine import initiate_rpool, NodeBlock +from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -197,12 +200,9 @@ from CPAC.utils.docs import version_report from CPAC.utils.monitoring import ( FMLOGGER, - getLogger, log_nodes_cb, log_nodes_initial, - LOGTAIL, set_up_logger, - WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) from CPAC.utils.monitoring.draw_gantt_chart import resource_report @@ -422,7 +422,7 @@ def run_workflow( license_notice=CPAC.license_notice.replace("\n", "\n "), ), ) - subject_info = {} + subject_info: dict[str, Any] = {} subject_info["subject_id"] = subject_id subject_info["start_time"] = pipeline_start_time @@ -560,7 +560,7 @@ def run_workflow( # for strat_no, strat in enumerate(strat_list): # strat_label = 'strat_%d' % strat_no - # subject_info[strat_label] = strat.get_name() + # subject_info[strat_label] = strat.name # subject_info['resource_pool'].append(strat.get_resource_pool()) subject_info["status"] = "Running" @@ -710,21 +710,24 @@ def run_workflow( ] timeHeader = dict(zip(gpaTimeFields, gpaTimeFields)) - with open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing" - f"_{c.pipeline_setup['pipeline_name']}.csv", - ), - "a", - ) as timeCSV, open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing_%s.csv" - % c.pipeline_setup["pipeline_name"], - ), - "r", - ) as readTimeCSV: + with ( + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing" + f"_{c.pipeline_setup['pipeline_name']}.csv", + ), + "a", + ) as timeCSV, + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing_%s.csv" + % c.pipeline_setup["pipeline_name"], + ), + "r", + ) as readTimeCSV, + ): timeWriter = csv.DictWriter(timeCSV, fieldnames=gpaTimeFields) timeReader = csv.DictReader(readTimeCSV) @@ -853,24 +856,6 @@ def remove_workdir(wdpath: str) -> None: FMLOGGER.warning("Could not remove working directory %s", wdpath) -def initialize_nipype_wf(cfg, sub_data_dct, name=""): - """Initialize a new nipype workflow.""" - if name: - name = f"_{name}" - - workflow_name = ( - f'cpac{name}_{sub_data_dct["subject_id"]}_{sub_data_dct["unique_id"]}' - ) - wf = pe.Workflow(name=workflow_name) - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": os.path.abspath(cfg.pipeline_setup["log_directory"]["path"]), - } - - return wf - - def load_cpac_pipe_config(pipe_config): """Load in pipeline config file.""" config_file = os.path.realpath(pipe_config) @@ -1071,7 +1056,6 @@ def build_T1w_registration_stack(rpool, cfg, pipeline_blocks=None): warp_wholeheadT1_to_template, warp_T1mask_to_template, ] - if not rpool.check_rpool("desc-restore-brain_T1w"): reg_blocks.append(correct_restore_brain_intensity_abcd) @@ -1128,99 +1112,10 @@ def build_segmentation_stack(rpool, cfg, pipeline_blocks=None): return pipeline_blocks -def list_blocks(pipeline_blocks, indent=None): - """List node blocks line by line. - - Parameters - ---------- - pipeline_blocks : list or tuple - - indent : int or None - number of spaces after a tab indent - - Returns - ------- - str - """ - blockstring = yaml.dump( - [ - getattr( - block, - "__name__", - getattr( - block, - "name", - yaml.safe_load(list_blocks(list(block))) - if isinstance(block, (tuple, list, set)) - else str(block), - ), - ) - for block in pipeline_blocks - ] - ) - if isinstance(indent, int): - blockstring = "\n".join( - [ - "\t" + " " * indent + line.replace("- - ", "- ") - for line in blockstring.split("\n") - ] - ) - return blockstring - - -def connect_pipeline(wf, cfg, rpool, pipeline_blocks): - """Connect the pipeline blocks to the workflow.""" - WFLOGGER.info( - "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) - ) - - previous_nb = None - for block in pipeline_blocks: - try: - nb = NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) - wf = nb.connect_block(wf, cfg, rpool) - except LookupError as e: - if nb.name == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) - continue - previous_nb_str = ( - (f"after node block '{previous_nb.get_name()}':") - if previous_nb - else "at beginning:" - ) - # Alert user to block that raises error - if isinstance(block, list): - node_block_names = str([NodeBlock(b).get_name() for b in block]) - e.args = ( - f"When trying to connect one of the node blocks " - f"{node_block_names} " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - else: - node_block_names = NodeBlock(block).get_name() - e.args = ( - f"When trying to connect node block " - f"'{node_block_names}' " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug(e.args[0]) - verbose_logger.debug(rpool) - raise - previous_nb = nb - - return wf - - def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): """Build a C-PAC workflow for a single subject.""" from CPAC.utils.datasource import gather_extraction_maps - # Workflow setup - wf = initialize_nipype_wf(cfg, sub_dict, name=pipeline_name) - # Extract credentials path if it exists try: creds_path = sub_dict["creds_path"] @@ -1244,8 +1139,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # PREPROCESSING # """"""""""""""""""""""""""""""""""""""""""""""""""" - wf, rpool = initiate_rpool(wf, cfg, sub_dict) - + rpool = ResourcePool(cfg=cfg, data_paths=sub_dict, pipeline_name=pipeline_name) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) # Anatomical to T1 template registration @@ -1437,7 +1331,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): if rpool.check_rpool(func): apply_func_warp["T1"] = False - target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] + # target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] target_space_alff = cfg.amplitude_low_frequency_fluctuation["target_space"] target_space_reho = cfg.regional_homogeneity["target_space"] @@ -1612,7 +1506,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # Connect the entire pipeline! try: - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) except LookupError as lookup_error: missing_key = None errorstrings = [arg for arg in lookup_error.args[0].split("\n") if arg.strip()] diff --git a/CPAC/pipeline/engine.py b/CPAC/pipeline/engine.py deleted file mode 100644 index d7f53f7029..0000000000 --- a/CPAC/pipeline/engine.py +++ /dev/null @@ -1,2761 +0,0 @@ -# Copyright (C) 2021-2024 C-PAC Developers - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -import ast -import copy -import hashlib -from itertools import chain -import json -import os -import re -from typing import Optional -import warnings - -from nipype import config, logging -from nipype.interfaces.utility import Rename - -from CPAC.image_utils.spatial_smoothing import spatial_smoothing -from CPAC.image_utils.statistical_transforms import ( - fisher_z_score_standardize, - z_score_standardize, -) -from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.nodeblock import NodeBlockFunction -from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set -from CPAC.registration.registration import transform_derivative -from CPAC.resources.templates.lookup_table import lookup_identifier -from CPAC.utils.bids_utils import res_in_filename -from CPAC.utils.configuration import Configuration -from CPAC.utils.datasource import ( - create_anat_datasource, - create_func_datasource, - create_general_datasource, - ingress_func_metadata, - resolve_resolution, -) -from CPAC.utils.interfaces.datasink import DataSink -from CPAC.utils.interfaces.function import Function -from CPAC.utils.monitoring import ( - getLogger, - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, - WFLOGGER, -) -from CPAC.utils.outputs import Outputs -from CPAC.utils.utils import ( - check_prov_for_regtool, - create_id_string, - get_last_prov_entry, - read_json, - write_output_json, -) - - -class ResourcePool: - def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): - if not rpool: - self.rpool = {} - else: - self.rpool = rpool - - if not pipe_list: - self.pipe_list = [] - else: - self.pipe_list = pipe_list - - self.name = name - self.info = {} - - if cfg: - self.cfg = cfg - self.logdir = cfg.pipeline_setup["log_directory"]["path"] - - self.num_cpus = cfg.pipeline_setup["system_config"][ - "max_cores_per_participant" - ] - self.num_ants_cores = cfg.pipeline_setup["system_config"][ - "num_ants_threads" - ] - - self.ants_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["ANTs_pipelines"]["interpolation"] - self.fsl_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["FNIRT_pipelines"]["interpolation"] - - self.func_reg = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["run"] - - self.run_smoothing = ( - "smoothed" in cfg.post_processing["spatial_smoothing"]["output"] - ) - self.smoothing_bool = cfg.post_processing["spatial_smoothing"]["run"] - self.run_zscoring = "z-scored" in cfg.post_processing["z-scoring"]["output"] - self.zscoring_bool = cfg.post_processing["z-scoring"]["run"] - self.fwhm = cfg.post_processing["spatial_smoothing"]["fwhm"] - self.smooth_opts = cfg.post_processing["spatial_smoothing"][ - "smoothing_method" - ] - - self.xfm = [ - "alff", - "desc-sm_alff", - "desc-zstd_alff", - "desc-sm-zstd_alff", - "falff", - "desc-sm_falff", - "desc-zstd_falff", - "desc-sm-zstd_falff", - "reho", - "desc-sm_reho", - "desc-zstd_reho", - "desc-sm-zstd_reho", - ] - - def __repr__(self) -> str: - params = [ - f"{param}={getattr(self, param)}" - for param in ["rpool", "name", "cfg", "pipe_list"] - if getattr(self, param, None) is not None - ] - return f'ResourcePool({", ".join(params)})' - - def __str__(self) -> str: - if self.name: - return f"ResourcePool({self.name}): {list(self.rpool)}" - return f"ResourcePool: {list(self.rpool)}" - - def append_name(self, name): - self.name.append(name) - - def back_propogate_template_name( - self, wf, resource_idx: str, json_info: dict, id_string: "pe.Node" - ) -> None: - """Find and apply the template name from a resource's provenance. - - Parameters - ---------- - resource_idx : str - - json_info : dict - - id_string : pe.Node - - Returns - ------- - None - """ - if "template" in resource_idx and self.check_rpool("derivatives-dir"): - if self.check_rpool("template"): - node, out = self.get_data("template") - wf.connect(node, out, id_string, "template_desc") - elif "Template" in json_info: - id_string.inputs.template_desc = json_info["Template"] - elif ( - "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 - ): - for resource in source_set(json_info["CpacProvenance"]): - source, value = resource.split(":", 1) - if value.startswith("template_") and source != "FSL-AFNI-bold-ref": - # 'FSL-AFNI-bold-ref' is currently allowed to be in - # a different space, so don't use it as the space for - # descendents - try: - anscestor_json = next(iter(self.rpool.get(source).items()))[ - 1 - ].get("json", {}) - if "Description" in anscestor_json: - id_string.inputs.template_desc = anscestor_json[ - "Description" - ] - return - except (IndexError, KeyError): - pass - return - - def get_name(self): - return self.name - - def check_rpool(self, resource): - if not isinstance(resource, list): - resource = [resource] - for name in resource: - if name in self.rpool: - return True - return False - - def get_pipe_number(self, pipe_idx): - return self.pipe_list.index(pipe_idx) - - def get_pool_info(self): - return self.info - - def set_pool_info(self, info_dct): - self.info.update(info_dct) - - def get_entire_rpool(self): - return self.rpool - - def get_resources(self): - return self.rpool.keys() - - def copy_rpool(self): - return ResourcePool( - rpool=copy.deepcopy(self.get_entire_rpool()), - name=self.name, - cfg=self.cfg, - pipe_list=copy.deepcopy(self.pipe_list), - ) - - @staticmethod - def get_raw_label(resource: str) -> str: - """Remove ``desc-*`` label.""" - for tag in resource.split("_"): - if "desc-" in tag: - resource = resource.replace(f"{tag}_", "") - break - return resource - - def get_strat_info(self, prov, label=None, logdir=None): - strat_info = {} - for entry in prov: - if isinstance(entry, list): - strat_info[entry[-1].split(":")[0]] = entry - elif isinstance(entry, str): - strat_info[entry.split(":")[0]] = entry.split(":")[1] - if label: - if not logdir: - logdir = self.logdir - WFLOGGER.info( - "\n\nPrinting out strategy info for %s in %s\n", label, logdir - ) - write_output_json( - strat_info, f"{label}_strat_info", indent=4, basedir=logdir - ) - - def set_json_info(self, resource, pipe_idx, key, val): - # TODO: actually should probably be able to inititialize resource/pipe_idx - if pipe_idx not in self.rpool[resource]: - msg = ( - "\n[!] DEV: The pipeline/strat ID does not exist " - f"in the resource pool.\nResource: {resource}" - f"Pipe idx: {pipe_idx}\nKey: {key}\nVal: {val}\n" - ) - raise Exception(msg) - if "json" not in self.rpool[resource][pipe_idx]: - self.rpool[resource][pipe_idx]["json"] = {} - self.rpool[resource][pipe_idx]["json"][key] = val - - def get_json_info(self, resource, pipe_idx, key): - # TODO: key checks - if not pipe_idx: - for pipe_idx, val in self.rpool[resource].items(): - return val["json"][key] - return self.rpool[resource][pipe_idx][key] - - @staticmethod - def get_resource_from_prov(prov): - # each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD - # data) has its own provenance list. the name of the resource, and - # the node that produced it, is always the last item in the provenance - # list, with the two separated by a colon : - if not len(prov): - return None - if isinstance(prov[-1], list): - return prov[-1][-1].split(":")[0] - if isinstance(prov[-1], str): - return prov[-1].split(":")[0] - return None - - def regressor_dct(self, cfg) -> dict: - """Return the regressor dictionary for the current strategy if one exists. - - Raises KeyError otherwise. - """ - # pylint: disable=attribute-defined-outside-init - if hasattr(self, "_regressor_dct"): # memoized - # pylint: disable=access-member-before-definition - return self._regressor_dct - key_error = KeyError( - "[!] No regressors in resource pool. \n\n" - "Try turning on create_regressors or " - "ingress_regressors." - ) - _nr = cfg["nuisance_corrections", "2-nuisance_regression"] - if not hasattr(self, "timeseries"): - if _nr["Regressors"]: - self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} - else: - self.regressors = [] - if self.check_rpool("parsed_regressors"): # ingressed regressor - # name regressor workflow without regressor_prov - strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] - if strat_name in self.regressors: - self._regressor_dct = self.regressors[strat_name] - return self._regressor_dct - self.regressor_dct = _nr["ingress_regressors"]["Regressors"] - return self.regressor_dct - prov = self.get_cpac_provenance("desc-confounds_timeseries") - strat_name_components = prov[-1].split("_") - for _ in list(range(prov[-1].count("_"))): - reg_name = "_".join(strat_name_components[-_:]) - if reg_name in self.regressors: - self._regressor_dct = self.regressors[reg_name] - return self._regressor_dct - raise key_error - - def set_data( - self, - resource, - node, - output, - json_info, - pipe_idx, - node_name, - fork=False, - inject=False, - ): - json_info = json_info.copy() - cpac_prov = [] - if "CpacProvenance" in json_info: - cpac_prov = json_info["CpacProvenance"] - current_prov_list = list(cpac_prov) - new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list - if not inject: - new_prov_list.append(f"{resource}:{node_name}") - try: - res, new_pipe_idx = self.generate_prov_string(new_prov_list) - except IndexError: - msg = ( - f"\n\nThe set_data() call for {resource} has no " - "provenance information and should not be an " - "injection." - ) - raise IndexError(msg) - if not json_info: - json_info = { - "RawSources": [ - resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes() - ] - } - json_info["CpacProvenance"] = new_prov_list - - if resource not in self.rpool.keys(): - self.rpool[resource] = {} - elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind - search = False - if self.get_resource_from_prov(current_prov_list) == resource: - # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(current_prov_list)[1] - if pipe_idx not in self.rpool[resource].keys(): - search = True - else: - search = True - if search: - for idx in current_prov_list: - if self.get_resource_from_prov(idx) == resource: - if isinstance(idx, list): - # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(idx)[1] - elif isinstance(idx, str): - pipe_idx = idx - break - if pipe_idx in self.rpool[resource].keys(): - # in case the resource name is now new, and not the original - # remove old keys so we don't end up with a new strat for every new node unit (unless we fork) - del self.rpool[resource][pipe_idx] - if new_pipe_idx not in self.rpool[resource]: - self.rpool[resource][new_pipe_idx] = {} - if new_pipe_idx not in self.pipe_list: - self.pipe_list.append(new_pipe_idx) - - self.rpool[resource][new_pipe_idx]["data"] = (node, output) - self.rpool[resource][new_pipe_idx]["json"] = json_info - - def get( - self, - resource: list[str] | str, - pipe_idx: Optional[str] = None, - report_fetched: Optional[bool] = False, - optional: Optional[bool] = False, - ) -> tuple[Optional[dict], Optional[str]] | Optional[dict]: - # NOTE!!! - # if this is the main rpool, this will return a dictionary of strats, and inside those, are dictionaries like {'data': (node, out), 'json': info} - # BUT, if this is a sub rpool (i.e. a strat_pool), this will return a one-level dictionary of {'data': (node, out), 'json': info} WITHOUT THE LEVEL OF STRAT KEYS ABOVE IT - if not isinstance(resource, list): - resource = [resource] - # if a list of potential inputs are given, pick the first one found - for label in resource: - if label in self.rpool.keys(): - _found = self.rpool[label] - if pipe_idx: - _found = _found[pipe_idx] - if report_fetched: - return _found, label - return _found - if optional: - if report_fetched: - return (None, None) - return None - msg = ( - "\n\n[!] C-PAC says: None of the listed resources are in " - f"the resource pool:\n\n {resource}\n\nOptions:\n- You " - "can enable a node block earlier in the pipeline which " - "produces these resources. Check the 'outputs:' field in " - "a node block's documentation.\n- You can directly " - "provide this required data by pulling it from another " - "BIDS directory using 'source_outputs_dir:' in the " - "pipeline configuration, or by placing it directly in " - "your C-PAC output directory.\n- If you have done these, " - "and you still get this message, please let us know " - "through any of our support channels at: " - "https://fcp-indi.github.io/\n" - ) - raise LookupError(msg) - - def get_data( - self, resource, pipe_idx=None, report_fetched=False, quick_single=False - ): - if report_fetched: - if pipe_idx: - connect, fetched = self.get( - resource, pipe_idx=pipe_idx, report_fetched=report_fetched - ) - return (connect["data"], fetched) - connect, fetched = self.get(resource, report_fetched=report_fetched) - return (connect["data"], fetched) - if pipe_idx: - return self.get(resource, pipe_idx=pipe_idx)["data"] - if quick_single or len(self.get(resource)) == 1: - for _key, val in self.get(resource).items(): - return val["data"] - return self.get(resource)["data"] - - def copy_resource(self, resource, new_name): - try: - self.rpool[new_name] = self.rpool[resource] - except KeyError: - msg = f"[!] {resource} not in the resource pool." - raise Exception(msg) - - def update_resource(self, resource, new_name): - # move over any new pipe_idx's - self.rpool[new_name].update(self.rpool[resource]) - - def get_pipe_idxs(self, resource): - return self.rpool[resource].keys() - - def get_json(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if strat: - resource_strat_dct = self.rpool[resource][strat] - else: - # for strat_pools mainly, where there is no 'strat' key level - resource_strat_dct = self.rpool[resource] - - # TODO: the below hits the exception if you use get_cpac_provenance on - # TODO: the main rpool (i.e. if strat=None) - if "json" in resource_strat_dct: - strat_json = resource_strat_dct["json"] - else: - msg = ( - "\n[!] Developer info: the JSON " - f"information for {resource} and {strat} " - f"is incomplete.\n" - ) - raise Exception(msg) - return strat_json - - def get_cpac_provenance(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if isinstance(resource, list): - for _resource in resource: - try: - return self.get_cpac_provenance(_resource, strat) - except KeyError: - continue - json_data = self.get_json(resource, strat) - return json_data["CpacProvenance"] - - @staticmethod - def generate_prov_string(prov): - # this will generate a string from a SINGLE RESOURCE'S dictionary of - # MULTIPLE PRECEDING RESOURCES (or single, if just one) - # NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation) - if not isinstance(prov, list): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov} has to be a list.\n" - ) - raise TypeError(msg) - last_entry = get_last_prov_entry(prov) - resource = last_entry.split(":")[0] - return (resource, str(prov)) - - @staticmethod - def generate_prov_list(prov_str): - if not isinstance(prov_str, str): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov_str!s} has to be a string.\n" - ) - raise TypeError(msg) - return ast.literal_eval(prov_str) - - @staticmethod - def get_resource_strats_from_prov(prov): - # if you provide the provenance of a resource pool output, this will - # return a dictionary of all the preceding resource pool entries that - # led to that one specific output: - # {rpool entry}: {that entry's provenance} - # {rpool entry}: {that entry's provenance} - resource_strat_dct = {} - if isinstance(prov, str): - resource = prov.split(":")[0] - resource_strat_dct[resource] = prov - else: - for spot, entry in enumerate(prov): - if isinstance(entry, list): - resource = entry[-1].split(":")[0] - resource_strat_dct[resource] = entry - elif isinstance(entry, str): - resource = entry.split(":")[0] - resource_strat_dct[resource] = entry - return resource_strat_dct - - def flatten_prov(self, prov): - if isinstance(prov, str): - return [prov] - if isinstance(prov, list): - flat_prov = [] - for entry in prov: - if isinstance(entry, list): - flat_prov += self.flatten_prov(entry) - else: - flat_prov.append(entry) - return flat_prov - return None - - def get_strats(self, resources, debug=False): - # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS - # TODO: (and it doesn't have to be) - - import itertools - - linked_resources = [] - resource_list = [] - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\nresources: %s", resources) - for resource in resources: - # grab the linked-input tuples - if isinstance(resource, tuple): - linked = [] - for label in list(resource): - rp_dct, fetched_resource = self.get( - label, report_fetched=True, optional=True - ) - if not rp_dct: - continue - linked.append(fetched_resource) - resource_list += linked - if len(linked) < 2: # noqa: PLR2004 - continue - linked_resources.append(linked) - else: - resource_list.append(resource) - - total_pool = [] - variant_pool = {} - len_inputs = len(resource_list) - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("linked_resources: %s", linked_resources) - verbose_logger.debug("resource_list: %s", resource_list) - for resource in resource_list: - ( - rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath - fetched_resource, - ) = self.get( - resource, - report_fetched=True, - optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be - ) - if not rp_dct: - len_inputs -= 1 - continue - sub_pool = [] - if debug: - verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) - for strat in rp_dct.keys(): - json_info = self.get_json(fetched_resource, strat) - cpac_prov = json_info["CpacProvenance"] - sub_pool.append(cpac_prov) - if fetched_resource not in variant_pool: - variant_pool[fetched_resource] = [] - if "CpacVariant" in json_info: - for key, val in json_info["CpacVariant"].items(): - if val not in variant_pool[fetched_resource]: - variant_pool[fetched_resource] += val - variant_pool[fetched_resource].append(f"NO-{val[0]}") - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) - total_pool.append(sub_pool) - - if not total_pool: - raise LookupError( - "\n\n[!] C-PAC says: None of the listed " - "resources in the node block being connected " - "exist in the resource pool.\n\nResources:\n" - "%s\n\n" % resource_list - ) - - # TODO: right now total_pool is: - # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], - # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] - - # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] - # TODO: and the actual resource is encoded in the tag: of the last item, every time! - # keying the strategies to the resources, inverting it - if len_inputs > 1: - strats = itertools.product(*total_pool) - - # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. - # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. - # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. - new_strats = {} - - # get rid of duplicates - TODO: refactor .product - strat_str_list = [] - strat_list_list = [] - for strat_tuple in strats: - strat_list = list(copy.deepcopy(strat_tuple)) - strat_str = str(strat_list) - if strat_str not in strat_str_list: - strat_str_list.append(strat_str) - strat_list_list.append(strat_list) - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) - for strat_list in strat_list_list: - json_dct = {} - for strat in strat_list: - # strat is a prov list for a single resource/input - strat_resource, strat_idx = self.generate_prov_string(strat) - strat_json = self.get_json(strat_resource, strat=strat_idx) - json_dct[strat_resource] = strat_json - - drop = False - if linked_resources: - for linked in linked_resources: # <--- 'linked' is each tuple - if drop: - break - for xlabel in linked: - if drop: - break - xjson = copy.deepcopy(json_dct[xlabel]) - for ylabel in linked: - if xlabel == ylabel: - continue - yjson = copy.deepcopy(json_dct[ylabel]) - - if "CpacVariant" not in xjson: - xjson["CpacVariant"] = {} - if "CpacVariant" not in yjson: - yjson["CpacVariant"] = {} - - current_strat = [] - for key, val in xjson["CpacVariant"].items(): - if isinstance(val, list): - current_strat.append(val[0]) - else: - current_strat.append(val) - current_spread = list(set(variant_pool[xlabel])) - for spread_label in current_spread: - if "NO-" in spread_label: - continue - if spread_label not in current_strat: - current_strat.append(f"NO-{spread_label}") - - other_strat = [] - for key, val in yjson["CpacVariant"].items(): - if isinstance(val, list): - other_strat.append(val[0]) - else: - other_strat.append(val) - other_spread = list(set(variant_pool[ylabel])) - for spread_label in other_spread: - if "NO-" in spread_label: - continue - if spread_label not in other_strat: - other_strat.append(f"NO-{spread_label}") - - for variant in current_spread: - in_current_strat = False - in_other_strat = False - in_other_spread = False - - if variant is None: - in_current_strat = True - if None in other_spread: - in_other_strat = True - if variant in current_strat: - in_current_strat = True - if variant in other_strat: - in_other_strat = True - if variant in other_spread: - in_other_spread = True - - if not in_other_strat: - if in_other_spread: - if in_current_strat: - drop = True - break - - if in_other_strat: - if in_other_spread: - if not in_current_strat: - drop = True - break - if drop: - break - if drop: - continue - - # make the merged strat label from the multiple inputs - # strat_list is actually the merged CpacProvenance lists - pipe_idx = str(strat_list) - new_strats[pipe_idx] = ResourcePool() - # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = {} - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list - - # now just invert resource:strat to strat:resource for each resource:strat - for cpac_prov in strat_list: - resource, strat = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][strat] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx].rpool[resource] = resource_strat_dct - # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. - self.pipe_list.append(pipe_idx) - if "CpacVariant" in resource_strat_dct["json"]: - if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: - new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} - for younger_resource, variant_list in resource_strat_dct[ - "json" - ]["CpacVariant"].items(): - if ( - younger_resource - not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] - ): - new_strats[pipe_idx].rpool["json"]["CpacVariant"][ - younger_resource - ] = variant_list - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - else: - new_strats = {} - for resource_strat_list in total_pool: - # total_pool will have only one list of strats, for the one input - for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs - resource, pipe_idx = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][pipe_idx] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx] = ResourcePool( - rpool={resource: resource_strat_dct} - ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] - # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON) - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - return new_strats - - def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): - if label in self.xfm: - json_info = dict(json_info) - - # get the bold-to-template transform from the current strat_pool info - xfm_idx = None - xfm_label = "from-bold_to-template_mode-image_xfm" - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == xfm_label: - xfm_prov = entry - xfm_idx = self.generate_prov_string(xfm_prov)[1] - break - - # but if the resource doesn't have the bold-to-template transform - # in its provenance/strategy, find the appropriate one for this - # current pipe_idx/strat - if not xfm_idx: - xfm_info = [] - for pipe_idx, entry in self.get(xfm_label).items(): - xfm_info.append((pipe_idx, entry["json"]["CpacProvenance"])) - else: - xfm_info = [(xfm_idx, xfm_prov)] - - for num, xfm_entry in enumerate(xfm_info): - xfm_idx, xfm_prov = xfm_entry - reg_tool = check_prov_for_regtool(xfm_prov) - - xfm = transform_derivative( - f"{label}_xfm_{pipe_x}_{num}", - label, - reg_tool, - self.num_cpus, - self.num_ants_cores, - ants_interp=self.ants_interp, - fsl_interp=self.fsl_interp, - opt=None, - ) - wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") - - node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) - wf.connect(node, out, xfm, "inputspec.reference") - - node, out = self.get_data( - "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx - ) - wf.connect(node, out, xfm, "inputspec.transform") - - label = f"space-template_{label}" - json_info["Template"] = self.get_json_info( - "T1w-brain-template-deriv", None, "Description" - ) - new_prov = json_info["CpacProvenance"] + xfm_prov - json_info["CpacProvenance"] = new_prov - new_pipe_idx = self.generate_prov_string(new_prov) - self.set_data( - label, - xfm, - "outputspec.out_file", - json_info, - new_pipe_idx, - f"{label}_xfm_{num}", - fork=True, - ) - - return wf - - @property - def filtered_movement(self) -> bool: - """ - Check if the movement parameters have been filtered in this strat_pool. - - Returns - ------- - bool - """ - try: - return "motion_estimate_filter" in str( - self.get_cpac_provenance("desc-movementParameters_motion") - ) - except KeyError: - # not a strat_pool or no movement parameters in strat_pool - return False - - def filter_name(self, cfg: Configuration) -> str: - """ - Return the name of the filter for this strategy. - - In a strat_pool with filtered movement parameters. - """ - motion_filters = cfg[ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "filters", - ] - if len(motion_filters) == 1 and cfg.switch_is_on( - [ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "run", - ], - exclusive=True, - ): - return motion_filters[0]["Name"] - try: - key = "motion" - sidecar = self.get_json("desc-movementParameters_motion") - except KeyError: - sidecar = None - if sidecar is not None and "CpacVariant" in sidecar: - if sidecar["CpacVariant"][key]: - return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] - return "none" - - def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs): - input_type = "func_derivative" - - post_labels = [(label, connection[0], connection[1])] - - if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): - # suffix: [eigenvector or degree] centrality [binarized or weighted] - # or lfcd [binarized or weighted] - mask = "template-specification-file" - elif "space-template" in label: - if "space-template_res-derivative_desc-bold_mask" in self.rpool.keys(): - mask = "space-template_res-derivative_desc-bold_mask" - else: - mask = "space-template_desc-bold_mask" - else: - mask = "space-bold_desc-brain_mask" - - mask_idx = None - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == mask: - mask_prov = entry - mask_idx = self.generate_prov_string(mask_prov)[1] - break - - if self.smoothing_bool: - if label in Outputs.to_smooth: - for smooth_opt in self.smooth_opts: - sm = spatial_smoothing( - f"{label}_smooth_{smooth_opt}_{pipe_x}", - self.fwhm, - input_type, - smooth_opt, - ) - wf.connect(connection[0], connection[1], sm, "inputspec.in_file") - node, out = self.get_data( - mask, pipe_idx=mask_idx, quick_single=mask_idx is None - ) - wf.connect(node, out, sm, "inputspec.mask") - - if "desc-" not in label: - if "space-" in label: - for tag in label.split("_"): - if "space-" in tag: - smlabel = label.replace(tag, f"{tag}_desc-sm") - break - else: - smlabel = f"desc-sm_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-sm" - smlabel = label.replace(tag, newtag) - break - - post_labels.append((smlabel, sm, "outputspec.out_file")) - - self.set_data( - smlabel, - sm, - "outputspec.out_file", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - self.set_data( - "fwhm", - sm, - "outputspec.fwhm", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - - if self.zscoring_bool: - for label_con_tpl in post_labels: - label = label_con_tpl[0] - connection = (label_con_tpl[1], label_con_tpl[2]) - if label in Outputs.to_zstd: - zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) - - wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") - - node, out = self.get_data(mask, pipe_idx=mask_idx) - wf.connect(node, out, zstd, "inputspec.mask") - - if "desc-" not in label: - if "space-template" in label: - new_label = label.replace( - "space-template", "space-template_desc-zstd" - ) - else: - new_label = f"desc-zstd_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-zstd" - new_label = label.replace(tag, newtag) - break - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "zscore_standardize", - fork=True, - ) - - elif label in Outputs.to_fisherz: - zstd = fisher_z_score_standardize( - f"{label}_zstd_{pipe_x}", label, input_type - ) - - wf.connect( - connection[0], connection[1], zstd, "inputspec.correlation_file" - ) - - # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' - oned = label.replace("correlations", "timeseries") - - node, out = outs[oned] - wf.connect(node, out, zstd, "inputspec.timeseries_oned") - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "fisher_zscore_standardize", - fork=True, - ) - - return (wf, post_labels) - - def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): - excl = [] - substring_excl = [] - outputs_logger = getLogger(f'{cfg["subject_id"]}_expectedOutputs') - expected_outputs = ExpectedOutputs() - - if add_excl: - excl += add_excl - - if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: - excl += Outputs.native_nonsmooth - excl += Outputs.template_nonsmooth - - if "raw" not in cfg.post_processing["z-scoring"]["output"]: - excl += Outputs.native_raw - excl += Outputs.template_raw - - if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: - # substring_excl.append(['bold']) - excl += Outputs.debugging - - for resource in self.rpool.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: - continue - - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue - - subdir = "other" - if resource in Outputs.anat: - subdir = "anat" - # TODO: get acq- etc. - elif resource in Outputs.func: - subdir = "func" - # TODO: other stuff like acq- etc. - - for pipe_idx in self.rpool[resource]: - unique_id = self.get_name() - part_id = unique_id.split("_")[0] - ses_id = unique_id.split("_")[1] - - if "ses-" not in ses_id: - ses_id = f"ses-{ses_id}" - - out_dir = cfg.pipeline_setup["output_directory"]["path"] - pipe_name = cfg.pipeline_setup["pipeline_name"] - container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id) - filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}" - - out_path = os.path.join(out_dir, container, subdir, filename) - - out_dct = { - "unique_id": unique_id, - "out_dir": out_dir, - "container": container, - "subdir": subdir, - "filename": filename, - "out_path": out_path, - } - self.rpool[resource][pipe_idx]["out"] = out_dct - - # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. - # TODO: can do the pipeline_description.json variants here too! - - for resource in self.rpool.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: - continue - - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue - - num_variant = 0 - if len(self.rpool[resource]) == 1: - num_variant = "" - all_jsons = [ - self.rpool[resource][pipe_idx]["json"] - for pipe_idx in self.rpool[resource] - ] - unlabelled = { - key - for json_info in all_jsons - for key in json_info.get("CpacVariant", {}).keys() - if key not in (*MOVEMENT_FILTER_KEYS, "regressors") - } - if "bold" in unlabelled: - all_bolds = list( - chain.from_iterable( - json_info["CpacVariant"]["bold"] - for json_info in all_jsons - if "CpacVariant" in json_info - and "bold" in json_info["CpacVariant"] - ) - ) - # not any(not) because all is overloaded as a parameter here - if not any( - not re.match( - r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold - ) - for _bold in all_bolds - ): - # this fork point should only result in 0 or 1 forks - unlabelled.remove("bold") - del all_bolds - all_forks = { - key: set( - chain.from_iterable( - json_info["CpacVariant"][key] - for json_info in all_jsons - if "CpacVariant" in json_info - and key in json_info["CpacVariant"] - ) - ) - for key in unlabelled - } - # del all_jsons - for key, forks in all_forks.items(): - if len(forks) < 2: # noqa: PLR2004 - # no int suffix needed if only one fork - unlabelled.remove(key) - # del all_forks - for pipe_idx in self.rpool[resource]: - pipe_x = self.get_pipe_number(pipe_idx) - json_info = self.rpool[resource][pipe_idx]["json"] - out_dct = self.rpool[resource][pipe_idx]["out"] - - try: - if unlabelled: - num_variant += 1 - except TypeError: - pass - - try: - del json_info["subjson"] - except KeyError: - pass - - if out_dct["subdir"] == "other" and not all: - continue - - unique_id = out_dct["unique_id"] - resource_idx = resource - - if isinstance(num_variant, int): - resource_idx, out_dct = name_fork( - resource_idx, cfg, json_info, out_dct - ) - if unlabelled: - if "desc-" in out_dct["filename"]: - for key in out_dct["filename"].split("_")[::-1]: - # final `desc` entity - if key.startswith("desc-"): - out_dct["filename"] = out_dct["filename"].replace( - key, f"{key}-{num_variant}" - ) - resource_idx = resource_idx.replace( - key, f"{key}-{num_variant}" - ) - break - else: - suff = resource.split("_")[-1] - newdesc_suff = f"desc-{num_variant}_{suff}" - resource_idx = resource_idx.replace(suff, newdesc_suff) - id_string = pe.Node( - Function( - input_names=[ - "cfg", - "unique_id", - "resource", - "scan_id", - "template_desc", - "atlas_id", - "fwhm", - "subdir", - "extension", - ], - output_names=["out_filename"], - function=create_id_string, - ), - name=f"id_string_{resource_idx}_{pipe_x}", - ) - id_string.inputs.cfg = self.cfg - id_string.inputs.unique_id = unique_id - id_string.inputs.resource = resource_idx - id_string.inputs.subdir = out_dct["subdir"] - - # grab the iterable scan ID - if out_dct["subdir"] == "func": - node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"] - wf.connect(node, out, id_string, "scan_id") - - self.back_propogate_template_name( - wf, resource_idx, json_info, id_string - ) - # grab the FWHM if smoothed - for tag in resource.split("_"): - if "desc-" in tag and "-sm" in tag: - fwhm_idx = pipe_idx.replace(f"{resource}:", "fwhm:") - try: - node, out = self.rpool["fwhm"][fwhm_idx]["data"] - wf.connect(node, out, id_string, "fwhm") - except KeyError: - # smoothing was not done for this resource in the - # engine.py smoothing - pass - break - atlas_suffixes = ["timeseries", "correlations", "statmap"] - # grab the iterable atlas ID - atlas_id = None - if not resource.endswith("desc-confounds_timeseries"): - if resource.split("_")[-1] in atlas_suffixes: - atlas_idx = pipe_idx.replace(resource, "atlas_name") - # need the single quote and the colon inside the double - # quotes - it's the encoded pipe_idx - # atlas_idx = new_idx.replace(f"'{temp_rsc}:", - # "'atlas_name:") - if atlas_idx in self.rpool["atlas_name"]: - node, out = self.rpool["atlas_name"][atlas_idx]["data"] - wf.connect(node, out, id_string, "atlas_id") - elif "atlas-" in resource: - for tag in resource.split("_"): - if "atlas-" in tag: - atlas_id = tag.replace("atlas-", "") - id_string.inputs.atlas_id = atlas_id - else: - warnings.warn( - str( - LookupError( - "\n[!] No atlas ID found for " - f"{out_dct['filename']}.\n" - ) - ) - ) - nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") - nii_name.inputs.keep_ext = True - - if resource in Outputs.ciftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = Outputs.ciftis[resource] - else: - nii_name.inputs.keep_ext = True - - if resource in Outputs.giftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" - - else: - nii_name.inputs.keep_ext = True - - wf.connect(id_string, "out_filename", nii_name, "format_string") - - node, out = self.rpool[resource][pipe_idx]["data"] - try: - wf.connect(node, out, nii_name, "in_file") - except OSError as os_error: - WFLOGGER.warning(os_error) - continue - - write_json_imports = ["import os", "import json"] - write_json = pe.Node( - Function( - input_names=["json_data", "filename"], - output_names=["json_file"], - function=write_output_json, - imports=write_json_imports, - ), - name=f"json_{resource_idx}_{pipe_x}", - ) - write_json.inputs.json_data = json_info - - wf.connect(id_string, "out_filename", write_json, "filename") - ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") - ds.inputs.parameterization = False - ds.inputs.base_directory = out_dct["out_dir"] - ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ - "s3_encryption" - ] - ds.inputs.container = out_dct["container"] - - if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: - ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ - "aws_output_bucket_credentials" - ] - expected_outputs += ( - out_dct["subdir"], - create_id_string( - self.cfg, - unique_id, - resource_idx, - template_desc=id_string.inputs.template_desc, - atlas_id=atlas_id, - subdir=out_dct["subdir"], - ), - ) - wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') - wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') - outputs_logger.info(expected_outputs) - - def node_data(self, resource, **kwargs): - """Create NodeData objects. - - Parameters - ---------- - resource : str - - Returns - ------- - NodeData - """ - return NodeData(self, resource, **kwargs) - - -class NodeBlock: - def __init__(self, node_block_functions, debug=False): - if not isinstance(node_block_functions, list): - node_block_functions = [node_block_functions] - - self.node_blocks = {} - - for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. - self.input_interface = [] - if isinstance(node_block_function, tuple): - self.input_interface = node_block_function[1] - node_block_function = node_block_function[0] - if not isinstance(self.input_interface, list): - self.input_interface = [self.input_interface] - - if not isinstance(node_block_function, NodeBlockFunction): - # If the object is a plain function `__name__` will be more useful than `str()` - obj_str = ( - node_block_function.__name__ - if hasattr(node_block_function, "__name__") - else str(node_block_function) - ) - msg = f'Object is not a nodeblock: "{obj_str}"' - raise TypeError(msg) - - name = node_block_function.name - self.name = name - self.node_blocks[name] = {} - - if self.input_interface: - for interface in self.input_interface: - for orig_input in node_block_function.inputs: - if isinstance(orig_input, tuple): - list_tup = list(orig_input) - if interface[0] in list_tup: - list_tup.remove(interface[0]) - list_tup.append(interface[1]) - node_block_function.inputs.remove(orig_input) - node_block_function.inputs.append(tuple(list_tup)) - elif orig_input == interface[0]: - node_block_function.inputs.remove(interface[0]) - node_block_function.inputs.append(interface[1]) - - for key, val in node_block_function.legacy_nodeblock_dict().items(): - self.node_blocks[name][key] = val - - self.node_blocks[name]["block_function"] = node_block_function - - # TODO: fix/replace below - self.outputs = {} - for out in node_block_function.outputs: - self.outputs[out] = None - - self.options = ["base"] - if node_block_function.outputs is not None: - self.options = node_block_function.outputs - - WFLOGGER.info("Connecting %s...", name) - if debug: - config.update_config({"logging": {"workflow_level": "DEBUG"}}) - logging.update_logging(config) - WFLOGGER.debug( - '"inputs": %s\n\t "outputs": %s%s', - node_block_function.inputs, - list(self.outputs.keys()), - f'\n\t"options": {self.options}' - if self.options != ["base"] - else "", - ) - config.update_config({"logging": {"workflow_level": "INFO"}}) - logging.update_logging(config) - - def get_name(self): - return self.name - - def check_null(self, val): - if isinstance(val, str): - val = None if val.lower() == "none" else val - return val - - def check_output(self, outputs, label, name): - if label not in outputs: - msg = ( - f'\n[!] Output name "{label}" in the block ' - "function does not match the outputs list " - f'{outputs} in Node Block "{name}"\n' - ) - raise NameError(msg) - - def grab_tiered_dct(self, cfg, key_list): - cfg_dct = cfg.dict() - for key in key_list: - try: - cfg_dct = cfg_dct.get(key, {}) - except KeyError as ke: - msg = "[!] The config provided to the node block is not valid" - raise KeyError(msg) from ke - return cfg_dct - - def connect_block(self, wf, cfg, rpool): - debug = cfg.pipeline_setup["Debugging"]["verbose"] - all_opts = [] - for name, block_dct in self.node_blocks.items(): - opts = [] - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - try: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - except AttributeError as err: - msg = f"{err}\nNode Block: {name}" - raise Exception(msg) - - if opts is None: - opts = [opts] - - elif option_key and not option_val: - # enables multiple config forking entries - if not isinstance(option_key[0], list): - msg = ( - f"[!] The option_key field ({option_key}) " - f"for {name} exists but there is no " - "option_val.\n\nIf you are trying to " - "populate multiple option keys, the " - "option_val field must contain a list of " - "a list.\n" - ) - raise ValueError(msg) - for option_config in option_key: - # option_config is a list of pipe config levels down to the option - if config: - key_list = config + option_config - else: - key_list = option_config - option_val = option_config[-1] - if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): - opts.append(option_val) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - all_opts += opts - - sidecar_additions = { - "CpacConfigHash": hashlib.sha1( - json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") - ).hexdigest(), - "CpacConfig": cfg.dict(), - } - - if cfg["pipeline_setup"]["output_directory"].get("user_defined"): - sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ - "output_directory" - ]["user_defined"] - - for name, block_dct in self.node_blocks.items(): - # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = self.check_null(block_dct["switch"]) - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - inputs = self.check_null(block_dct["inputs"]) - outputs = self.check_null(block_dct["outputs"]) - - block_function = block_dct["block_function"] - - opts = [] - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples - if not opts: - # for node blocks where the options are split into different - # block functions - opts will be empty for non-selected - # options, and would waste the get_strats effort below - continue - - if not switch: - switch = [True] - else: - if config: - try: - key_list = config + switch - except TypeError as te: - msg = ( - "\n\n[!] Developer info: Docstring error " - f"for {name}, make sure the 'config' or " - "'switch' fields are lists.\n\n" - ) - raise TypeError(msg) from te - switch = self.grab_tiered_dct(cfg, key_list) - elif isinstance(switch[0], list): - # we have multiple switches, which is designed to only work if - # config is set to "None" - switch_list = [] - for key_list in switch: - val = self.grab_tiered_dct(cfg, key_list) - if isinstance(val, list): - # fork switches - if True in val: - switch_list.append(True) - if False in val: - switch_list.append(False) - else: - switch_list.append(val) - if False in switch_list: - switch = [False] - else: - switch = [True] - else: - # if config is set to "None" - key_list = switch - switch = self.grab_tiered_dct(cfg, key_list) - if not isinstance(switch, list): - switch = [switch] - if True in switch: - for ( - pipe_idx, - strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} - ) in rpool.get_strats(inputs, debug).items(): - # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } - fork = False in switch - for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. - # remember, you can get 'data' or 'json' from strat_pool with member functions - # strat_pool has all of the JSON information of all the inputs! - # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. - # particularly, our custom 'CpacProvenance' field. - node_name = name - pipe_x = rpool.get_pipe_number(pipe_idx) - - replaced_inputs = [] - for interface in self.input_interface: - if isinstance(interface[1], list): - for input_name in interface[1]: - if strat_pool.check_rpool(input_name): - break - else: - input_name = interface[1] - strat_pool.copy_resource(input_name, interface[0]) - replaced_inputs.append(interface[0]) - try: - wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) - except IOError as e: # duplicate node - WFLOGGER.warning(e) - continue - - if not outs: - if block_function.__name__ == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append( - WARNING_FREESURFER_OFF_WITH_DATA - ) - continue - - if opt and len(option_val) > 1: - node_name = f"{node_name}_{opt}" - elif opt and "USER-DEFINED" in option_val: - node_name = f'{node_name}_{opt["Name"]}' - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\n=======================") - verbose_logger.debug("Node name: %s", node_name) - prov_dct = rpool.get_resource_strats_from_prov( - ast.literal_eval(pipe_idx) - ) - for key, val in prov_dct.items(): - verbose_logger.debug("-------------------") - verbose_logger.debug("Input - %s:", key) - sub_prov_dct = rpool.get_resource_strats_from_prov(val) - for sub_key, sub_val in sub_prov_dct.items(): - sub_sub_dct = rpool.get_resource_strats_from_prov( - sub_val - ) - verbose_logger.debug(" sub-input - %s:", sub_key) - verbose_logger.debug(" prov = %s", sub_val) - verbose_logger.debug( - " sub_sub_inputs = %s", sub_sub_dct.keys() - ) - - for label, connection in outs.items(): - self.check_output(outputs, label, name) - new_json_info = copy.deepcopy(strat_pool.get("json")) - - # transfer over data-specific json info - # for example, if the input data json is _bold and the output is also _bold - data_type = label.split("_")[-1] - if data_type in new_json_info["subjson"]: - if ( - "SkullStripped" - in new_json_info["subjson"][data_type] - ): - new_json_info["SkullStripped"] = new_json_info[ - "subjson" - ][data_type]["SkullStripped"] - - # determine sources for the outputs, i.e. all input data into the node block - new_json_info["Sources"] = [ - x - for x in strat_pool.get_entire_rpool() - if x != "json" and x not in replaced_inputs - ] - - if isinstance(outputs, dict): - new_json_info.update(outputs[label]) - if "Description" not in outputs[label]: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - if "Template" in outputs[label]: - template_key = outputs[label]["Template"] - if template_key in new_json_info["Sources"]: - # only if the pipeline config template key is entered as the 'Template' field - # otherwise, skip this and take in the literal 'Template' string - try: - new_json_info["Template"] = new_json_info[ - "subjson" - ][template_key]["Description"] - except KeyError: - pass - try: - new_json_info["Resolution"] = new_json_info[ - "subjson" - ][template_key]["Resolution"] - except KeyError: - pass - else: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - - if "Description" in new_json_info: - new_json_info["Description"] = " ".join( - new_json_info["Description"].split() - ) - - for sidecar_key, sidecar_value in sidecar_additions.items(): - if sidecar_key not in new_json_info: - new_json_info[sidecar_key] = sidecar_value - - try: - del new_json_info["subjson"] - except KeyError: - pass - - if fork or len(opts) > 1 or len(all_opts) > 1: - if "CpacVariant" not in new_json_info: - new_json_info["CpacVariant"] = {} - raw_label = rpool.get_raw_label(label) - if raw_label not in new_json_info["CpacVariant"]: - new_json_info["CpacVariant"][raw_label] = [] - new_json_info["CpacVariant"][raw_label].append( - node_name - ) - - rpool.set_data( - label, - connection[0], - connection[1], - new_json_info, - pipe_idx, - node_name, - fork, - ) - - wf, post_labels = rpool.post_process( - wf, - label, - connection, - new_json_info, - pipe_idx, - pipe_x, - outs, - ) - - if rpool.func_reg: - for postlabel in post_labels: - connection = (postlabel[1], postlabel[2]) - wf = rpool.derivative_xfm( - wf, - postlabel[0], - connection, - new_json_info, - pipe_idx, - pipe_x, - ) - return wf - - -def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): - """Wrap a list of node block functions to use within other node blocks. - - Example usage: - - # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to - # skull-strip an EPI field map, without having to invoke the NodeBlock - # connection system. - - # The interface dictionary tells wrap_block to set the EPI field map - # in the parent node block's throw-away strat_pool as 'bold', so that - # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as - # the 'bold' input. - - # It also tells wrap_block to set the 'desc-brain_bold' output of - # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it - # actually is) in the parent node block's strat_pool, which gets - # returned. - - # Note 'bold' and 'desc-brain_bold' (all on the left side) are the - # labels that 'bold_mask_afni' and 'bold_masking' understand/expect - # through their interfaces and docstrings. - - # The right-hand side (the values of the 'interface' dictionary) are - # what 'make sense' within the current parent node block - in this - # case, the distortion correction node block dealing with field maps. - - interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - 'desc-brain_bold': 'opposite_pe_epi_brain'} - wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - interface, wf, cfg, strat_pool, - pipe_num, opt) - - ...further downstream in the parent node block: - - node, out = strat_pool.get_data('opposite_pe_epi_brain') - - # The above line will connect the output of the 'bold_masking' node - # block (which is the skull-stripped version of 'opposite_pe_epi') to - # the next node. - - """ - for block in node_blocks: - # new_pool = copy.deepcopy(strat_pool) - for in_resource, val in interface.items(): - if isinstance(val, tuple): - strat_pool.set_data( - in_resource, val[0], val[1], {}, "", "", fork=True - ) # - if "sub_num" not in strat_pool.get_pool_info(): - strat_pool.set_pool_info({"sub_num": 0}) - sub_num = strat_pool.get_pool_info()["sub_num"] - - wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) # - for out, val in outputs.items(): - if out in interface and isinstance(interface[out], str): - strat_pool.set_data( - interface[out], outputs[out][0], outputs[out][1], {}, "", "" - ) - else: - strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "") - sub_num += 1 - strat_pool.set_pool_info({"sub_num": sub_num}) - - return (wf, strat_pool) - - -def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - if "anat" not in data_paths: - WFLOGGER.warning("No anatomical data present.") - return rpool - - if "creds_path" not in data_paths: - data_paths["creds_path"] = None - - anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}") - - anat = {} - if isinstance(data_paths["anat"], str): - anat["T1"] = data_paths["anat"] - elif "T1w" in data_paths["anat"]: - anat["T1"] = data_paths["anat"]["T1w"] - - if "T1" in anat: - anat_flow.inputs.inputnode.set( - subject=part_id, - anat=anat["T1"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") - - if "T2w" in data_paths["anat"]: - anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}") - anat_flow_T2.inputs.inputnode.set( - subject=part_id, - anat=data_paths["anat"]["T2w"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress") - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - - return rpool - - -def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - try: - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id) - except KeyError: - WFLOGGER.warning("No FreeSurfer data present.") - return rpool - - # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) - if not os.path.exists(fs_path): - if "sub" in part_id: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "") - ) - else: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id) - ) - - # patch for flo-specific data - if not os.path.exists(fs_path): - subj_ses = part_id + "-" + ses_id - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses) - if not os.path.exists(fs_path): - WFLOGGER.info("No FreeSurfer data found for subject %s", part_id) - return rpool - - # Check for double nested subj names - if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): - fs_path = os.path.join(fs_path, part_id) - - fs_ingress = create_general_datasource("gather_freesurfer_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fs_path, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "freesurfer-subject-dir", - fs_ingress, - "outputspec.data", - {}, - "", - "freesurfer_config_ingress", - ) - - recon_outs = { - "pipeline-fs_raw-average": "mri/rawavg.mgz", - "pipeline-fs_subcortical-seg": "mri/aseg.mgz", - "pipeline-fs_brainmask": "mri/brainmask.mgz", - "pipeline-fs_wmparc": "mri/wmparc.mgz", - "pipeline-fs_T1": "mri/T1.mgz", - "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", - "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", - "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", - "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", - "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", - "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", - "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", - "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", - "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", - "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", - "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", - "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", - "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", - "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", - "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", - "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", - "pipeline-fs_xfm": "mri/transforms/talairach.lta", - } - - for key, outfile in recon_outs.items(): - fullpath = os.path.join(fs_path, outfile) - if os.path.exists(fullpath): - fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fullpath, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" - ) - else: - warnings.warn( - str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) - ) - - return rpool - - -def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - func_paths_dct = data_paths["func"] - - func_wf = create_func_datasource( - func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}" - ) - func_wf.inputs.inputnode.set( - subject=part_id, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) - - rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") - rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") - rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - - # TODO: CHECK FOR PARAMETERS - - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, data_paths, part_id, data_paths["creds_path"], ses_id - ) - - # Memoize list of local functional scans - # TODO: handle S3 files - # Skip S3 files for now - - local_func_scans = [ - func_paths_dct[scan]["scan"] - for scan in func_paths_dct.keys() - if not func_paths_dct[scan]["scan"].startswith("s3://") - ] - if local_func_scans: - # pylint: disable=protected-access - wf._local_func_scans = local_func_scans - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("local_func_scans: %s", local_func_scans) - del local_func_scans - - return (wf, rpool, diff, blip, fmap_rp_list) - - -def ingress_output_dir( - wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None -): - dir_path = data_paths["derivatives_dir"] - - WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) - - anat = os.path.join(dir_path, "anat") - func = os.path.join(dir_path, "func") - - exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] - - outdir_anat = [] - outdir_func = [] - func_paths = {} - func_dict = {} - - for subdir in [anat, func]: - if os.path.isdir(subdir): - for filename in os.listdir(subdir): - for ext in exts: - if ext in filename: - if subdir == anat: - outdir_anat.append(os.path.join(subdir, filename)) - else: - outdir_func.append(os.path.join(subdir, filename)) - - # Add derivatives directory to rpool - ingress = create_general_datasource("gather_derivatives_dir") - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=dir_path, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress" - ) - - for subdir in [outdir_anat, outdir_func]: - for filepath in subdir: - filename = str(filepath) - for ext in exts: - filename = filename.split("/")[-1].replace(ext, "") - - data_label = filename.split(unique_id)[1].lstrip("_") - - if len(filename) == len(data_label): - msg = ( - "\n\n[!] Possibly wrong participant or " - "session in this directory?\n\n" - f"Filepath: {filepath}\n\n" - ) - raise Exception(msg) - - bidstag = "" - for tag in data_label.split("_"): - for prefix in ["task-", "run-", "acq-", "rec"]: - if tag.startswith(prefix): - bidstag += f"{tag}_" - data_label = data_label.replace(f"{tag}_", "") - data_label, json = strip_template(data_label, dir_path, filename) - - rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress( - rpool, filepath, exts, data_label, json - ) - - if ( - "template" in data_label - and not json_info["Template"] - == cfg.pipeline_setup["outdir_ingress"]["Template"] - ): - continue - # Rename confounds to avoid confusion in nuisance regression - if data_label.endswith("desc-confounds_timeseries"): - data_label = "pipeline-ingress_desc-confounds_timeseries" - - if len(bidstag) > 1: - # Remove tail symbol - bidstag = bidstag[:-1] - if bidstag.startswith("task-"): - bidstag = bidstag.replace("task-", "") - - # Rename bold mask for CPAC naming convention - # and to avoid collision with anat brain mask - if data_label.endswith("desc-brain_mask") and filepath in outdir_func: - data_label = data_label.replace("brain_mask", "bold_mask") - - try: - pipe_x = rpool.get_pipe_number(pipe_idx) - except ValueError: - pipe_x = len(rpool.pipe_list) - if filepath in outdir_anat: - ingress = create_general_datasource( - f"gather_anat_outdir_{data_label!s}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=filepath, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - data_label, - ingress, - "outputspec.data", - json_info, - pipe_idx, - node_name, - f"outdir_{data_label}_ingress", - inject=True, - ) - else: - if data_label.endswith("desc-preproc_bold"): - func_key = data_label - func_dict[bidstag] = {} - func_dict[bidstag]["scan"] = str(filepath) - func_dict[bidstag]["scan_parameters"] = json_info - func_dict[bidstag]["pipe_idx"] = pipe_idx - if data_label.endswith("desc-brain_mask"): - data_label = data_label.replace("brain_mask", "bold_mask") - try: - func_paths[data_label].append(filepath) - except: - func_paths[data_label] = [] - func_paths[data_label].append(filepath) - - if func_dict: - wf, rpool = func_outdir_ingress( - wf, - cfg, - func_dict, - rpool, - unique_id, - creds_path, - part_id, - func_key, - func_paths, - ) - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - return wf, rpool - - -def json_outdir_ingress(rpool, filepath, exts, data_label, json): - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in exts: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return rpool, json_info, pipe_idx, node_name, data_label - - -def func_outdir_ingress( - wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths -): - pipe_x = len(rpool.pipe_list) - ingress = create_func_datasource( - func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - subject=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") - ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) - rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") - - rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, func_dict, part_id, creds_path, key - ) - - # Have to do it this weird way to save the parsed BIDS tag & filepath - mask_paths_key = ( - "desc-bold_mask" - if "desc-bold_mask" in func_paths - else "space-template_desc-bold_mask" - ) - ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" - - # Connect func data with approproate scan name - iterables = pe.Node( - Function( - input_names=["scan", "mask_paths", "ts_paths"], - output_names=["out_scan", "mask", "confounds"], - function=set_iterables, - ), - name=f"set_iterables_{pipe_x}", - ) - iterables.inputs.mask_paths = func_paths[mask_paths_key] - iterables.inputs.ts_paths = func_paths[ts_paths_key] - wf.connect(ingress, "outputspec.scan", iterables, "scan") - - for key in func_paths: - if key in (mask_paths_key, ts_paths_key): - ingress_func = create_general_datasource(f"ingress_func_data_{key}") - ingress_func.inputs.inputnode.set( - unique_id=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") - if key == mask_paths_key: - wf.connect(iterables, "mask", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - elif key == ts_paths_key: - wf.connect(iterables, "confounds", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - - return wf, rpool - - -def set_iterables(scan, mask_paths=None, ts_paths=None): - # match scan with filepath to get filepath - mask_path = [path for path in mask_paths if scan in path] - ts_path = [path for path in ts_paths if scan in path] - - return (scan, mask_path[0], ts_path[0]) - - -def strip_template(data_label, dir_path, filename): - json = {} - # rename to template - for prefix in ["space-", "from-", "to-"]: - for bidstag in data_label.split("_"): - if bidstag.startswith(prefix): - template_key, template_val = bidstag.split("-") - template_name, _template_desc = lookup_identifier(template_val) - if template_name: - json["Template"] = template_val - data_label = data_label.replace(template_val, "template") - elif bidstag.startswith("res-"): - res_key, res_val = bidstag.split("-") - json["Resolution"] = res_val - data_label = data_label.replace(bidstag, "") - if data_label.find("__"): - data_label = data_label.replace("__", "_") - return data_label, json - - -def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None): - # ingress config file paths - # TODO: may want to change the resource keys for each to include one level up in the YAML as well - - import pandas as pd - import pkg_resources as p - - template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") - template_df = pd.read_csv(template_csv, keep_default_na=False) - - for row in template_df.itertuples(): - key = row.Key - val = row.Pipeline_Config_Entry - val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")]) - resolution = row.Intended_Resolution_Config_Entry - desc = row.Description - - if not val: - continue - - if resolution: - res_keys = [x.lstrip() for x in resolution.split(",")] - tag = res_keys[-1] - json_info = {} - - if "$FSLDIR" in val: - val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]) - if "$priors_path" in val: - priors_path = ( - cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][ - "priors_path" - ] - or "" - ) - if "$FSLDIR" in priors_path: - priors_path = priors_path.replace( - "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"] - ) - val = val.replace("$priors_path", priors_path) - if "${resolution_for_anat}" in val: - val = val.replace( - "${resolution_for_anat}", - cfg.registration_workflows["anatomical_registration"][ - "resolution_for_anat" - ], - ) - if "${func_resolution}" in val: - val = val.replace( - "${func_resolution}", - cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["output_resolution"][tag], - ) - - if desc: - template_name, _template_desc = lookup_identifier(val) - if template_name: - desc = f"{template_name} - {desc}" - json_info["Description"] = f"{desc} - {val}" - if resolution: - resolution = cfg.get_nested(cfg, res_keys) - json_info["Resolution"] = resolution - - resampled_template = pe.Node( - Function( - input_names=["resolution", "template", "template_name", "tag"], - output_names=["resampled_template"], - function=resolve_resolution, - as_module=True, - ), - name="resampled_" + key, - ) - - resampled_template.inputs.resolution = resolution - resampled_template.inputs.template = val - resampled_template.inputs.template_name = key - resampled_template.inputs.tag = tag - - # the set_data below is set up a little differently, because we are - # injecting and also over-writing already-existing entries - # other alternative would have been to ingress into the - # resampled_template node from the already existing entries, but we - # didn't do that here - rpool.set_data( - key, - resampled_template, - "resampled_template", - json_info, - "", - "template_resample", - ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually - - elif val: - config_ingress = create_general_datasource(f"gather_{key}") - config_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=val, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, - config_ingress, - "outputspec.data", - json_info, - "", - f"{key}_config_ingress", - ) - # templates, resampling from config - """ - template_keys = [ - ("anat", ["network_centrality", "template_specification_file"]), - ("anat", ["nuisance_corrections", "2-nuisance_regression", - "lateral_ventricles_mask"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "CSF_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "GM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "WM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])] - - def get_nested_attr(c, template_key): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _get_nested(attr, keys): - if len(keys) > 1: - return (_get_nested(attr[keys[0]], keys[1:])) - elif len(keys): - return (attr[keys[0]]) - else: - return (attr) - - return (_get_nested(attr, keys)) - - def set_nested_attr(c, template_key, value): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _set_nested(attr, keys): - if len(keys) > 1: - return (_set_nested(attr[keys[0]], keys[1:])) - elif len(keys): - attr[keys[0]] = value - else: - return (attr) - - return (_set_nested(attr, keys)) - - for key_type, key in template_keys: - attr = cfg.get_nested(cfg, key) - if isinstance(attr, str) or attr == None: - node = create_check_for_s3_node( - key[-1], - attr, key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=False - ) - cfg.set_nested(cfg, key, node) - - template_keys_in_list = [ - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_brain_list"]), - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_segmentation_list"]), - ] - - for key_type, key in template_keys_in_list: - node = create_check_for_s3_node( - key[-1], - cfg.get_nested(cfg, key), key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=True - ) - cfg.set_nested(cfg, key, node) - """ - - return rpool - - -def initiate_rpool(wf, cfg, data_paths=None, part_id=None): - """ - Initialize a new ResourcePool. - - data_paths format: - {'anat': { - 'T1w': '{T1w path}', - 'T2w': '{T2w path}' - }, - 'creds_path': {None OR path to credentials CSV}, - 'func': { - '{scan ID}': - { - 'scan': '{path to BOLD}', - 'scan_parameters': {scan parameter dictionary} - } - }, - 'site_id': 'site-ID', - 'subject_id': 'sub-01', - 'unique_id': 'ses-1', - 'derivatives_dir': '{derivatives_dir path}'} - """ - # TODO: refactor further, integrate with the ingress_data functionality - # TODO: used for BIDS-Derivatives (below), and possible refactoring of - # TODO: the raw data config to use 'T1w' label instead of 'anat' etc. - - if data_paths: - part_id = data_paths["subject_id"] - ses_id = data_paths["unique_id"] - if "creds_path" not in data_paths: - creds_path = None - else: - creds_path = data_paths["creds_path"] - unique_id = f"{part_id}_{ses_id}" - - elif part_id: - unique_id = part_id - creds_path = None - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - if data_paths: - # ingress outdir - try: - if ( - data_paths["derivatives_dir"] - and cfg.pipeline_setup["outdir_ingress"]["run"] - ): - wf, rpool = ingress_output_dir( - wf, - cfg, - rpool, - unique_id, - data_paths, - part_id, - ses_id, - creds_path=None, - ) - except: - rpool = ingress_raw_anat_data( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - if "func" in data_paths: - wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - - # grab any file paths from the pipeline config YAML - rpool = ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path) - - # output files with 4 different scans - - return (wf, rpool) - - -def run_node_blocks(blocks, data_paths, cfg=None): - import os - - from CPAC.pipeline import nipype_pipeline_engine as pe - from CPAC.pipeline.engine import NodeBlock - - if not cfg: - cfg = { - "pipeline_setup": { - "working_directory": {"path": os.getcwd()}, - "log_directory": {"path": os.getcwd()}, - } - } - - # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! - _, rpool = initiate_rpool(cfg, data_paths) - - wf = pe.Workflow(name="node_blocks") - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"], - } - - run_blocks = [] - if rpool.check_rpool("desc-preproc_T1w"): - WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") - else: - run_blocks += blocks[0] - if rpool.check_rpool("desc-preproc_bold"): - WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") - else: - run_blocks += blocks[1] - - for block in run_blocks: - wf = NodeBlock( - block, debug=cfg["pipeline_setup", "Debugging", "verbose"] - ).connect_block(wf, cfg, rpool) - rpool.gather_pipes(wf, cfg) - - wf.run() - - -class NodeData: - r"""Attribute access for ResourcePool.get_data outputs. - - Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can - do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and - ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)`` - and needing two variables (``node`` and ``out``) to store that information. - - Also includes ``variant`` attribute providing the resource's self-keyed value - within its ``CpacVariant`` dictionary. - - Examples - -------- - >>> rp = ResourcePool() - >>> rp.node_data(None) - NotImplemented (NotImplemented) - - >>> rp.set_data('test', - ... pe.Node(Function(input_names=[]), 'test'), - ... 'b', [], 0, 'test') - >>> rp.node_data('test') - test (b) - >>> rp.node_data('test').out - 'b' - - >>> try: - ... rp.node_data('b') - ... except LookupError as lookup_error: - ... print(str(lookup_error).strip().split('\n')[0].strip()) - [!] C-PAC says: None of the listed resources are in the resource pool: - """ - - # pylint: disable=too-few-public-methods - def __init__(self, strat_pool=None, resource=None, **kwargs): - self.node = NotImplemented - self.out = NotImplemented - if strat_pool is not None and resource is not None: - self.node, self.out = strat_pool.get_data(resource, **kwargs) - - def __repr__(self): # noqa: D105 - return f'{getattr(self.node, "name", str(self.node))} ({self.out})' diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py new file mode 100644 index 0000000000..534c9f7450 --- /dev/null +++ b/CPAC/pipeline/engine/__init__.py @@ -0,0 +1,26 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""C-PAC engine.""" + +from .nodeblock import NodeBlock +from .resource import ResourcePool, StratPool + +__all__ = [ + "NodeBlock", + "ResourcePool", + "StratPool", +] diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py new file mode 100644 index 0000000000..e68bfbf0d2 --- /dev/null +++ b/CPAC/pipeline/engine/nodeblock.py @@ -0,0 +1,349 @@ +# Copyright (C) 2023-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Classes and decorator for :py:class:`NodeBlock` s and :py:class:`NodeBlockFunction` s.""" + +from typing import Any, Callable, Optional, TYPE_CHECKING + +import yaml +from nipype import config, logging # type: ignore [import-untyped] +from nipype.pipeline.engine import Workflow # type: ignore[import-untyped] + +from CPAC.utils.configuration.configuration import Configuration +from CPAC.utils.monitoring import ( + WFLOGGER, +) + +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import ResourceData, StratPool + +NODEBLOCK_INPUTS = list[str | list | tuple] +NODEBLOCK_OUTPUTS = list[str] | dict[str, Any] +PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] + + +class NodeBlockFunction: + """Store a reference to the nodeblock function and all of its metadata.""" + + def __init__( + self, + func: Callable, + name: str, + config: Optional[list[str]] = None, + switch: Optional[list[str] | list[list[str]]] = None, + option_key: Optional[str | list[str]] = None, + option_val: Optional[str | list[str]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, + outputs: Optional[NODEBLOCK_OUTPUTS] = None, + ) -> None: + self.func = func + """:py:class:`Nodeblock` function reference.""" + self.name: str = name + """Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node` s.""" + self.config: Optional[list[str]] = config + """ + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.utils.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this function. If `config` is set to ``None``, + then all other :py:class:`~CPAC.utils.configuration.Configuration` -related + entities must be specified from the root of the :py:class:`~CPAC.utils.configuration.Configuration` . + """ + self.switch: Optional[list[str] | list[list[str]]] = switch + """ + Indicates any keys that should evaluate to ``True`` for this :py:class:`NodeBlock` + to be active. A list of lists of strings indicates multiple `switch` es + that must all be ``True`` to run, and is currently only an option if `config` is + set to ``None``. + """ + self.option_key: Optional[str | list[str]] = option_key + """ + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`. + """ + self.option_val: Optional[str | list[str]] = option_val + """Indicates values for which this :py:class:`NodeBlock` should be active.""" + self.inputs: list[str | list | tuple] = inputs if inputs else [] + """:py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources needed for the :py:class:`NodeBlock`'s functionality.""" + self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] + """ + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources generated or updated by the :py:class:`NodeBlock`, optionally + including metadata for the outputs' respective sidecars. + """ + + # Forward function attributes similar to functools.update_wrapper: + # https://docs.python.org/3/library/functools.html#functools.update_wrapper + self.__module__ = func.__module__ + self.__name__ = func.__name__ + self.__qualname__ = func.__qualname__ + self.__annotations__ = func.__annotations__ + self.__doc__ = "".join( + [ + _.replace(" ", "") + for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__] + if _ is not None + ] + ).rstrip() + + def __call__( + self, + wf: Workflow, + cfg: Configuration, + strat_pool: "StratPool", + pipe_num: Optional[int | str], + opt: Optional[str] = None, + ) -> tuple[Workflow, dict[str, "ResourceData"]]: + """Call a :py:class:`NodeBlockFunction`. + + All :py:class:`NodeBlockFunction` s have the same signature. + """ + return self.func(wf, cfg, strat_pool, pipe_num, opt) + + def legacy_nodeblock_dict(self): + """Return :py:class:`NodeBlock` metadata as a dictionary. + + Helper for compatibility reasons. + """ + return { + "name": self.name, + "config": self.config, + "switch": self.switch, + "option_key": self.option_key, + "option_val": self.option_val, + "inputs": self.inputs, + "outputs": self.outputs, + } + + def __repr__(self) -> str: + """Return reproducible string representation of a :py:class:`NodeBlockFunction`.""" + return ( + f"NodeBlockFunction({self.func.__module__}." + f'{self.func.__name__}, "{self.name}", ' + f"config={self.config}, switch={self.switch}, " + f"option_key={self.option_key}, option_val=" + f"{self.option_val}, inputs={self.inputs}, " + f"outputs={self.outputs})" + ) + + def __str__(self) -> str: + """Return string representation of a :py:class:`NodeBlockFunction`.""" + return f"NodeBlockFunction({self.name})" + + +class NodeBlock: + """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction` s.""" + + def __init__( + self, + node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, + debug: bool = False, + ) -> None: + """Create a :py:class:`NodeBlock` from a list of :py:class:`NodeBlockFunction` s.""" + if not isinstance(node_block_functions, list): + node_block_functions = [node_block_functions] + + self.node_blocks: dict[str, Any] = {} + + for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. + self.input_interface = [] + if isinstance(node_block_function, tuple): + self.input_interface = node_block_function[1] + node_block_function = node_block_function[0] # noqa: PLW2901 + if not isinstance(self.input_interface, list): + self.input_interface = [self.input_interface] + + if not isinstance(node_block_function, NodeBlockFunction): + # If the object is a plain function `__name__` will be more useful than `str()` + obj_str = ( + node_block_function.__name__ # type: ignore [attr-defined] + if hasattr(node_block_function, "__name__") + else str(node_block_function) + ) + msg = f'Object is not a nodeblock: "{obj_str}"' + raise TypeError(msg) + + name = node_block_function.name + self.name = name + self.node_blocks[name] = {} + + if self.input_interface: + for interface in self.input_interface: + for orig_input in node_block_function.inputs: + if isinstance(orig_input, tuple): + list_tup = list(orig_input) + if interface[0] in list_tup: + list_tup.remove(interface[0]) + list_tup.append(interface[1]) + node_block_function.inputs.remove(orig_input) + node_block_function.inputs.append(tuple(list_tup)) + elif orig_input == interface[0]: + node_block_function.inputs.remove(interface[0]) + node_block_function.inputs.append(interface[1]) + + for key, val in node_block_function.legacy_nodeblock_dict().items(): + self.node_blocks[name][key] = val + + self.node_blocks[name]["block_function"] = node_block_function + + # TODO: fix/replace below + self.outputs: dict[str, Optional[str]] = {} + for out in node_block_function.outputs: + self.outputs[out] = None + + self.options: list[str] | dict[str, Any] = ["base"] + if node_block_function.outputs is not None: + self.options = node_block_function.outputs + + WFLOGGER.info("Connecting %s...", name) + if debug: + config.update_config({"logging": {"workflow_level": "DEBUG"}}) + logging.update_logging(config) + WFLOGGER.debug( + '"inputs": %s\n\t "outputs": %s%s', + node_block_function.inputs, + list(self.outputs.keys()), + f'\n\t"options": {self.options}' + if self.options != ["base"] + else "", + ) + config.update_config({"logging": {"workflow_level": "INFO"}}) + logging.update_logging(config) + + def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None: + """Check if a label is listed in a :py:class:`NodeBlock` 's `outputs`. + + Raises + ------ + NameError + If a mismatch is found. + """ + if label not in outputs: + msg = ( + f'\n[!] Output name "{label}" in the block ' + "function does not match the outputs list " + f'{outputs} in Node Block "{name}"\n' + ) + raise NameError(msg) + + @staticmethod + def list_blocks( + pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None + ) -> str: + """List :py:class:`NodeBlockFunction` s line by line. + + Parameters + ---------- + pipeline_blocks + list of :py:class:`NodeBlockFunction` s + + indent + number of spaces after a tab indent + + Returns + ------- + str + formatted list of :py:class:`NodeBlockFunction` s + """ + blockstring = yaml.dump( + [ + getattr( + block, + "__name__", + getattr( + block, + "name", + yaml.safe_load(NodeBlock.list_blocks(list(block))) + if isinstance(block, (tuple, list, set)) + else str(block), + ), + ) + for block in pipeline_blocks + ] + ) + if isinstance(indent, int): + blockstring = "\n".join( + [ + "\t" + " " * indent + line.replace("- - ", "- ") + for line in blockstring.split("\n") + ] + ) + return blockstring + + +def nodeblock( + name: Optional[str] = None, + config: Optional[list[str]] = None, + switch: Optional[list[str] | list[list[str]]] = None, + option_key: Optional[str | list[str]] = None, + option_val: Optional[str | list[str]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, + outputs: Optional[list[str] | dict[str, Any]] = None, +): + """Define a :py:class:`NodeBlockFunction` . + + Connections to the pipeline :py:class:`~CPAC.utils.configuration.Configuration` and to other :py:class:`NodeBlockFunction` s. + + Parameters + ---------- + name + Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node` s. + The :py:class:`NodeBlockFunction`'s `.__name__` is used if `name` is not + provided. + + config + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.pipeline.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this :py:class:`NodeBlockFunction`. If `config` + is set to ``None``, then all other + :py:class:`~CPAC.pipeline.configuration.Configuration` -related entities + must be specified from the root of the + :py:class:`~CPAC.pipeline.configuration.Configuration` . + + switch + Indicates any keys that should evaluate to ``True`` for this + :py:class:`NodeBlock` to be active. A list of lists of strings indicates + multiple switches that must all be ``True`` to run, and is currently only an + option if config is set to ``None``. + + option_key + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`. + + option_val + Indicates values for which this :py:class:`NodeBlock` should be active. + + inputs + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files needed for the :py:class:`NodeBlock` 's + functionality. + + outputs + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files + generated or updated by the :py:class:`NodeBlock`, optionally including metadata + for the `outputs` ' respective sidecars. + """ + return lambda func: NodeBlockFunction( + func, + name if name is not None else func.__name__, + config, + switch, + option_key, + option_val, + inputs, + outputs, + ) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py new file mode 100644 index 0000000000..988d4bdc04 --- /dev/null +++ b/CPAC/pipeline/engine/resource.py @@ -0,0 +1,3261 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +""":py:class:`Resource` s and :py:class:`ResourcePool` s for C-PAC.""" + +import ast +from collections.abc import KeysView +from copy import deepcopy +import hashlib +from itertools import chain +import json +import os +from pathlib import Path +import re +from types import NoneType +from typing import Any, Literal, NamedTuple, Optional, overload + +from nipype.interfaces import utility as util # type: ignore [import-untyped] +from nipype.interfaces.utility import Rename # type: ignore [import-untyped] +from nipype.pipeline import engine as pe # type: ignore [import-untyped] + +from CPAC.image_utils.spatial_smoothing import spatial_smoothing +from CPAC.image_utils.statistical_transforms import ( + fisher_z_score_standardize, + z_score_standardize, +) +from CPAC.pipeline.check_outputs import ExpectedOutputs +from CPAC.pipeline.engine.nodeblock import ( + NodeBlock, + NODEBLOCK_INPUTS, + NODEBLOCK_OUTPUTS, + NodeBlockFunction, + PIPELINE_BLOCKS, +) +from CPAC.pipeline.utils import name_fork, source_set +from CPAC.registration.registration import transform_derivative +from CPAC.resources.templates.lookup_table import lookup_identifier +from CPAC.utils.bids_utils import res_in_filename +from CPAC.utils.configuration.configuration import Configuration, Preconfiguration +from CPAC.utils.datasource import ( + calc_delta_te_and_asym_ratio, + check_for_s3, + check_func_scan, + create_anat_datasource, + create_fmap_datasource, + create_general_datasource, + gather_echo_times, + get_fmap_phasediff_metadata, + get_rest, + resolve_resolution, +) +from CPAC.utils.interfaces.datasink import DataSink +from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import ( + getLogger, + LOGTAIL, + UTLOGGER, + WARNING_FREESURFER_OFF_WITH_DATA, + WFLOGGER, +) +from CPAC.utils.outputs import Outputs +from CPAC.utils.typing import LIST_OF_LIST_OF_STR, PIPE_IDX +from CPAC.utils.utils import ( + check_prov_for_regtool, + create_id_string, + get_last_prov_entry, + get_scan_params, + read_json, + write_output_json, +) + +EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] +POOL_DICT = dict[str | tuple, "STRAT_DICT"] +STRAT_DICT = dict[str | tuple, "Resource"] + + +class DataPaths: + """Store subject-session specific data paths.""" + + def __init__( + self, *, data_paths: Optional[dict] = None, part_id: Optional[str] = "" + ) -> None: + """Initialize a `DataPaths` instance.""" + if not data_paths: + data_paths = {} + if part_id and "part_id" in data_paths and part_id != data_paths["part_id"]: + WFLOGGER.warning( + "both 'part_id' (%s) and data_paths['part_id'] (%s) provided. " + "Using '%s'.", + part_id, + data_paths["part_id"], + part_id, + ) + anat: dict[str, str] | str = data_paths.get("anat", {}) + if isinstance(anat, str): + anat = {"T1": anat} + self.anat: dict[str, str] = anat + self.creds_path: Optional[str] = data_paths.get("creds_path") + self.fmap: Optional[dict] = data_paths.get("fmap") + self.func: dict[str, dict[str, str | dict]] = data_paths.get("func", {}) + self.part_id: str = data_paths.get("subject_id", "") + self.site_id: str = data_paths.get("site_id", "") + self.ses_id: str = data_paths.get("unique_id", "") + self.unique_id: str = "_".join([self.part_id, self.ses_id]) + self.derivatives_dir: Optional[str] = data_paths.get("derivatives_dir") + + def __repr__(self) -> str: + """Return reproducible string representation of `DataPaths` instance.""" + return f"DataPaths(data_paths={self.as_dict()})" + + def __str__(self) -> str: + """Return string representation of a `DataPaths` instance.""" + return f"" + + def as_dict(self) -> dict: + """Return a `data_paths` dictionary. + + `data_paths` format:: + + {"anat": {"T1w": "{T1w path}", "T2w": "{T2w path}"}, + "creds_path": {None OR path to credentials CSV}, + "func": { + "{scan ID}": { + "scan": "{path to BOLD}", + "scan_parameters": {scan parameter dictionary}, + } + }, + "site_id": "site-ID", + "subject_id": "sub-01", + "unique_id": "ses-1", + "derivatives_dir": "{derivatives_dir path}",} + """ + return { + k: v + for k, v in { + key: getattr(self, key) + for key in [ + "anat", + "creds_path", + "func", + "site_id", + "subject_id", + "unique_id", + "derivatives_dir", + ] + }.items() + if v + } + + +@Function.sig_imports(["from typing import Optional"]) +def set_iterables( + scan: str, + mask_paths: Optional[list[str]] = None, + ts_paths: Optional[list[str]] = None, +) -> tuple[str, str, str]: + """Match scan with filepath to get filepath.""" + if mask_paths is None: + mask_paths = [] + if ts_paths is None: + ts_paths = [] + mask_path = [path for path in mask_paths if scan in path] + ts_path = [path for path in ts_paths if scan in path] + + return (scan, mask_path[0], ts_path[0]) + + +def strip_template(data_label: str) -> tuple[str, dict[str, str]]: + """Strip a template name from a data label to use as a :py:class:`Resource` key.""" + json = {} + # rename to template + for prefix in ["space-", "from-", "to-"]: + for bidstag in data_label.split("_"): + if bidstag.startswith(prefix): + _template_key, template_val = bidstag.split("-") + template_name, _template_desc = lookup_identifier(template_val) + if template_name: + json["Template"] = template_val + data_label = data_label.replace(template_val, "template") + elif bidstag.startswith("res-"): + _res_key, res_val = bidstag.split("-") + json["Resolution"] = res_val + data_label = data_label.replace(bidstag, "") + if data_label.find("__"): + data_label = data_label.replace("__", "_") + return data_label, json + + +class ResourceData(NamedTuple): + """Attribute and tuple access for `ResourceData`.""" + + node: pe.Node + """Resource :py:class:`~nipype.pipeline.engine.Node`.""" + out: str + """Output key.""" + + +class Resource: + """A single `Resource` and its methods.""" + + def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: + """Initialize a `Resource`.""" + self.data = ResourceData(*data) + """Tuple of source :py:class:`~nipype.pipeline.engine.Node` and output key.""" + self._json: dict = json + """Metadata.""" + self._keys = {"data", "json"} + """Dictionary-style subscriptable keys.""" + + def keys(self) -> list[str]: + """Return list of subscriptable keys.""" + return list(self._keys) + + def __contains__(self, item: Any) -> bool: + """Return ``True`` if `item` in :py:meth:`Resource.keys()`, ``False`` otherwise.""" + return item in self.keys() + + def __getitem__(self, name: str) -> Any: + """Provide legacy dict-style get access.""" + if name in self.keys(): + return getattr(self, name) + msg = f"Key '{name}' not set in {self}." + raise KeyError(msg) + + def __repr__(self) -> str: + """Return reproducible string for `Resource`.""" + positional = f"Resource(data={self.data}, json={self.json}" + kw = ", ".join( + f"{key}={getattr(self, key)}" + for key in self.keys() + if key not in ["data", "json"] + ) + return f"{positional}{kw})" + + def __setitem__(self, name: str, value: Any) -> None: + """Provide legacy dict-style set access for `Resource`.""" + setattr(self, name, value) + if name not in self.keys(): + self._keys.add(name) + + def __str__(self) -> str: + """Return string representation of `Resource`.""" + return f"{self.data[0]}" + + def get_json(self) -> dict[str | tuple, Any]: + """Return a deep copy of `Resource` JSON.""" + UTLOGGER.debug( + "%s is a deep copy of the attached JSON. Assign it to a variable before modifying or the changes will be ephemeral.", + self.__class__.__name__, + ) + return json.loads(json.dumps(self._json)) + + def set_json(self, value=dict) -> None: + """Update `Resource` JSON.""" + self._json.update(value) + + json = property(get_json, set_json, doc=get_json.__doc__) + + @property + def cpac_provenance(self) -> list: + """Get "CpacProvenance" of a `Resource`.""" + return self.json["CpacProvenance"] + + +class _Pool: + """All Resources.""" + + def __init__(self) -> None: + """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool` .""" + self.ants_interp: str + self.cfg: Configuration + self.creds_paths: Optional[str] + self.data_paths: DataPaths + self.fsl_interp: str + self.func_reg: bool + self.fwhm: list[int] + self.info: dict = {} + self.logdir: Optional[str] + self.name: list[str] | str + self.num_ants_cores: int + self.num_cpus = int + self.part_id: str + self.pipe_list: list + self.ses_id: str + self.smoothing_bool: bool + self.smooth_opts: list[str] + self.regressors: dict | list + self.rpool: dict + self.run_smoothing: bool + self.run_zscoring: bool + self.unique_id: str + self.zscoring_bool: bool + self.wf: pe.Workflow + + def __repr__(self) -> str: + """Return reproducible `_Pool` string.""" + params = [ + f"{param}={getattr(self, param)}" + for param in ["rpool", "name", "cfg", "pipe_list"] + if getattr(self, param, None) + ] + return f'{self.__class__.__name__}({", ".join(params)})' + + def __str__(self) -> str: + """Return string representation of a `_Pool`.""" + if self.name: + return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}" + return f"{self.__class__.__name__}: {list(self.rpool)}" + + @staticmethod + def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: + """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). + + NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). + """ + if not isinstance(prov, list): + msg = ( + "\n[!] Developer info: the CpacProvenance " + f"entry for {prov} has to be a list.\n" + ) + raise TypeError(msg) + last_entry = get_last_prov_entry(prov) + resource = last_entry.split(":")[0] + return (resource, str(prov)) + + def check_rpool(self, resource: list[str] | str) -> bool: + """Check if a `resource` is present in the `_Pool`.""" + if not isinstance(resource, list): + resource = [resource] + for name in resource: + if name in self.rpool: + return True + return False + + def keys(self) -> KeysView: + """Return `rpool`'s keys.""" + return self.rpool.keys() + + def __contains__(self, key) -> bool: + """Return ``True`` if key in `_Pool`, ``False`` otherwise.""" + return key in self.keys() + + @staticmethod + def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: + """Return the last item in the provenance list. + + Each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD + data) has its own provenance list. the name of the resource, and + the node that produced it, is always the last item in the provenance + list, with the two separated by a colon (`:`) + """ + if not len(prov): + return None + if isinstance(prov[-1], list): + last_item_in_list = prov[-1][-1] + assert isinstance(last_item_in_list, str) + return last_item_in_list.split(":")[0] + if isinstance(prov[-1], str): + return prov[-1].split(":")[0] + return None + + def set_data( + self, + resource: str, + node: pe.Node | pe.Workflow, + output: str, + json_info: dict[str | tuple, Any], + pipe_idx: PIPE_IDX, + node_name: str, + fork: bool = False, + inject: bool = False, + ) -> None: + """Plug a :py:class:`Resource` into a `_Pool`.""" + json_info = json_info.copy() + cpac_prov: LIST_OF_LIST_OF_STR = [] + if "CpacProvenance" in json_info: + cpac_prov = json_info["CpacProvenance"] + current_prov_list = list(cpac_prov) + new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list + if not inject: + new_prov_list.append(f"{resource}:{node_name}") + try: + _resource, new_pipe_idx = self.generate_prov_string(new_prov_list) + except IndexError: + msg = ( + f"\n\nThe set_data() call for {resource} has no " + "provenance information and should not be an " + "injection." + ) + raise IndexError(msg) + if not json_info: + json_info = { + "RawSources": [ + resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes() + ] + } + json_info["CpacProvenance"] = new_prov_list + + if resource not in self.keys(): + self.rpool[resource] = {} + elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind + search = False + if self.get_resource_from_prov(current_prov_list) == resource: + # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION + pipe_idx = self.generate_prov_string(current_prov_list)[1] + if pipe_idx not in self.rpool[resource].keys(): + search = True + else: + search = True + if search: + for idx in current_prov_list: + if self.get_resource_from_prov(idx) == resource: + if isinstance(idx, list): + # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION + pipe_idx = self.generate_prov_string(idx)[1] + elif isinstance(idx, str): + pipe_idx = idx + break + if pipe_idx in self.rpool[resource].keys(): + # in case the resource name is now new, and not the original + # remove old keys so we don't end up with a new strat for every new node unit (unless we fork) + del self.rpool[resource][pipe_idx] + if new_pipe_idx not in self.rpool[resource]: + self.rpool[resource][new_pipe_idx] = Resource( + data=ResourceData(node, output), json=json_info + ) + if new_pipe_idx not in self.pipe_list: + self.pipe_list.append(new_pipe_idx) + + def get( + self, + resource: LIST_OF_LIST_OF_STR | str | list[str], + pipe_idx: Optional[PIPE_IDX], + report_fetched: bool, + optional: bool, + ) -> ( + Optional[Resource | STRAT_DICT | dict] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] + ): + """Return a dictionary of strats or a single :py:class:`Resource` .""" + if not isinstance(resource, list): + resource = [resource] + # if a list of potential inputs are given, pick the first one found + for label in resource: + if label in self.keys(): + _found = self.rpool[label] + if pipe_idx: + _found = _found[pipe_idx] + if report_fetched: + return _found, label + return _found + if optional: + if report_fetched: + return (None, None) + return None + msg = ( + "\n\n[!] C-PAC says: None of the listed resources are in " + f"the resource pool:\n\n {resource}\n\nOptions:\n- You " + "can enable a node block earlier in the pipeline which " + "produces these resources. Check the 'outputs:' field in " + "a node block's documentation.\n- You can directly " + "provide this required data by pulling it from another " + "BIDS directory using 'source_outputs_dir:' in the " + "pipeline configuration, or by placing it directly in " + "your C-PAC output directory.\n- If you have done these, " + "and you still get this message, please let us know " + "through any of our support channels at: " + "https://fcp-indi.github.io/\n" + ) + raise LookupError(msg) + + +class ResourcePool(_Pool): + """A pool of :py:class:`Resource` s.""" + + def __init__( + self, + name: str = "", + cfg: Optional[Configuration] = None, + pipe_list: Optional[list] = None, + *, + data_paths: Optional[DataPaths | dict] = None, + part_id: Optional[str] = None, + pipeline_name: str = "", + wf: Optional[pe.Workflow] = None, + ) -> None: + """Initialize a `ResourcePool`.""" + self.name = name + super().__init__() + if isinstance(data_paths, dict): + data_paths = DataPaths(data_paths=data_paths) + elif not data_paths: + data_paths = DataPaths(part_id=part_id) + self.data_paths = data_paths + # pass-through for convenient access + self.creds_path = self.data_paths.creds_path + self.part_id = self.data_paths.part_id + self.ses_id = self.data_paths.ses_id + self.unique_id = self.data_paths.unique_id + self.rpool: POOL_DICT = {} + + if not pipe_list: + self.pipe_list = [] + else: + self.pipe_list = pipe_list + + if cfg: + self.cfg = cfg + else: + self.cfg = Preconfiguration("blank") + + self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) + self.num_cpus = self._config_lookup( + ["pipeline_setup", "system_config", "max_cores_per_participant"] + ) + self.num_ants_cores = self._config_lookup( + ["pipeline_setup", "system_config", "num_ants_threads"] + ) + + self.ants_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "ANTs_pipelines", + "interpolation", + ] + ) + self.fsl_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "FNIRT_pipelines", + "interpolation", + ] + ) + self.func_reg = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "run", + ] + ) + + self.run_smoothing = "smoothed" in self._config_lookup( + ["post_processing", "spatial_smoothing", "output"], list + ) + self.smoothing_bool = self._config_lookup( + ["post_processing", "spatial_smoothing", "run"] + ) + self.run_zscoring = "z-scored" in self._config_lookup( + ["post_processing", "z-scoring", "output"], list + ) + self.zscoring_bool = self._config_lookup( + ["post_processing", "z-scoring", "run"] + ) + self.fwhm = self._config_lookup( + ["post_processing", "spatial_smoothing", "fwhm"] + ) + self.smooth_opts = self._config_lookup( + ["post_processing", "spatial_smoothing", "smoothing_method"] + ) + + if wf: + self.wf = wf + else: + self.initialize_nipype_wf(pipeline_name) + + self.xfm = [ + "alff", + "desc-sm_alff", + "desc-zstd_alff", + "desc-sm-zstd_alff", + "falff", + "desc-sm_falff", + "desc-zstd_falff", + "desc-sm-zstd_falff", + "reho", + "desc-sm_reho", + "desc-zstd_reho", + "desc-sm-zstd_reho", + ] + ingress_derivatives = False + try: + if self.data_paths.derivatives_dir and self._config_lookup( + ["pipeline_setup", "outdir_ingress", "run"], bool + ): + ingress_derivatives = True + except (AttributeError, KeyError, TypeError): + pass + if ingress_derivatives: + self.ingress_output_dir() + else: + self.ingress_raw_anat_data() + if data_paths.func: + self.ingress_raw_func_data() + self.ingress_pipeconfig_paths() + + def back_propogate_template_name( + self, resource_idx: str, json_info: dict, id_string: pe.Node + ) -> None: + """Find and apply the template name from a :py:class:`Resource` 's provenance.""" + if "template" in resource_idx and self.check_rpool("derivatives-dir"): + if self.check_rpool("template"): + node, out = self.get_data("template") + self.wf.connect(node, out, id_string, "template_desc") + elif "Template" in json_info: + id_string.inputs.template_desc = json_info["Template"] + elif ( + "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 + ): + for resource in source_set(json_info["CpacProvenance"]): + source, value = resource.split(":", 1) + if value.startswith("template_") and source != "FSL-AFNI-bold-ref": + # 'FSL-AFNI-bold-ref' is currently allowed to be in + # a different space, so don't use it as the space for + # descendents + try: + ancestors = self.rpool.get(source) + assert ancestors is not None + anscestor_json = next(iter(ancestors.items()))[1].json + if "Description" in anscestor_json: + id_string.inputs.template_desc = anscestor_json[ + "Description" + ] + return + except (IndexError, KeyError): + pass + return + + def gather_pipes( # noqa: PLR0915 + self, + wf: pe.Workflow, + cfg: Configuration, + all_types: bool = False, + add_excl: Optional[list[str]] = None, + ) -> None: + """Gather pipes including naming, postproc, and expected outputs.""" + excl: list[str] = [] + # substring_excl: list[str] = [] + outputs_logger = getLogger(f"{self.part_id}_expectedOutputs") + expected_outputs = ExpectedOutputs() + + if add_excl: + excl += add_excl + + if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: # type: ignore [attr-defined] + excl += Outputs.native_nonsmooth + excl += Outputs.template_nonsmooth + + if "raw" not in cfg.post_processing["z-scoring"]["output"]: # type: ignore [attr-defined] + excl += Outputs.native_raw + excl += Outputs.template_raw + + if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: # type: ignore [attr-defined] + # substring_excl.append(['bold']) + excl += Outputs.debugging + + for resource in self.keys(): + if resource in excl or resource not in Outputs.any: + continue + + # drop = False + # for substring_list in substring_excl: + # bool_list = [] + # for substring in substring_list: + # if substring in resource: + # bool_list.append(True) + # else: + # bool_list.append(False) + # for item in bool_list: + # if not item: + # break + # else: + # drop = True + # if drop: + # break + # if drop: + # continue + + subdir = "other" + if resource in Outputs.anat: + subdir = "anat" + # TODO: get acq- etc. + elif resource in Outputs.func: + subdir = "func" + # TODO: other stuff like acq- etc. + + for pipe_idx in self.rpool[resource]: + unique_id = self.unique_id + part_id = self.part_id + ses_id = self.ses_id + + if "ses-" not in ses_id: + ses_id = f"ses-{ses_id}" + + out_dir = cfg.pipeline_setup["output_directory"]["path"] # type: ignore [attr-defined] + pipe_name = cfg.pipeline_setup["pipeline_name"] # type: ignore [attr-defined] + container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id) + filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}" + + out_path = os.path.join(out_dir, container, subdir, filename) + + out_dct = { + "unique_id": unique_id, + "out_dir": out_dir, + "container": container, + "subdir": subdir, + "filename": filename, + "out_path": out_path, + } + self.rpool[resource][pipe_idx]["out"] = out_dct + + # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. + # TODO: can do the pipeline_description.json variants here too! + + num_variant: Optional[int | str] = 0 + if len(self.rpool[resource]) == 1: + num_variant = "" + unlabelled = self._get_unlabelled(resource) + for pipe_idx in self.rpool[resource]: + pipe_x = self._get_pipe_number(pipe_idx) + json_info = self.rpool[resource][pipe_idx]["json"] + out_dct = self.rpool[resource][pipe_idx]["out"] + + try: + if unlabelled: + assert isinstance(num_variant, int) + num_variant += 1 + except TypeError: + pass + + try: + del json_info["subjson"] + except KeyError: + pass + + if out_dct["subdir"] == "other" and not all_types: + continue + + unique_id = out_dct["unique_id"] + resource_idx = resource + + if isinstance(num_variant, int): + resource_idx, out_dct = name_fork( + resource_idx, cfg, json_info, out_dct + ) + if unlabelled: + if "desc-" in out_dct["filename"]: + for key in out_dct["filename"].split("_")[::-1]: + # final `desc` entity + if key.startswith("desc-"): + out_dct["filename"] = out_dct["filename"].replace( + key, f"{key}-{num_variant}" + ) + resource_idx = resource_idx.replace( + key, f"{key}-{num_variant}" + ) + break + else: + suff = resource.split("_")[-1] + newdesc_suff = f"desc-{num_variant}_{suff}" + resource_idx = resource_idx.replace(suff, newdesc_suff) + id_string = pe.Node( + Function( + input_names=[ + "cfg", + "unique_id", + "resource", + "scan_id", + "template_desc", + "atlas_id", + "fwhm", + "subdir", + "extension", + ], + output_names=["out_filename"], + function=create_id_string, + ), + name=f"id_string_{resource_idx}_{pipe_x}", + ) + id_string.inputs.cfg = self.cfg + id_string.inputs.unique_id = unique_id + id_string.inputs.resource = resource_idx + id_string.inputs.subdir = out_dct["subdir"] + + # grab the iterable scan ID + if out_dct["subdir"] == "func": + node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"] + wf.connect(node, out, id_string, "scan_id") + + self.back_propogate_template_name(resource_idx, json_info, id_string) + # grab the FWHM if smoothed + for tag in resource.split("_"): + if "desc-" in tag and "-sm" in tag: + fwhm_idx = str(pipe_idx).replace(f"{resource}:", "fwhm:") + try: + node, out = self.rpool["fwhm"][fwhm_idx]["data"] + wf.connect(node, out, id_string, "fwhm") + except KeyError: + # smoothing was not done for this resource in the + # engine.py smoothing + pass + break + atlas_suffixes = ["timeseries", "correlations", "statmap"] + # grab the iterable atlas ID + atlas_id = None + if not resource.endswith("desc-confounds_timeseries"): + if resource.split("_")[-1] in atlas_suffixes: + atlas_idx = str(pipe_idx).replace(resource, "atlas_name") + # need the single quote and the colon inside the double + # quotes - it's the encoded pipe_idx + # atlas_idx = new_idx.replace(f"'{temp_rsc}:", + # "'atlas_name:") + if atlas_idx in self.rpool["atlas_name"]: + node, out = self.rpool["atlas_name"][atlas_idx]["data"] + wf.connect(node, out, id_string, "atlas_id") + elif "atlas-" in resource: + for tag in resource.split("_"): + if "atlas-" in tag: + atlas_id = tag.replace("atlas-", "") + id_string.inputs.atlas_id = atlas_id + else: + WFLOGGER.warning( + "\n[!] No atlas ID found for %s.\n", out_dct["filename"] + ) + nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") + nii_name.inputs.keep_ext = True + + if resource in Outputs.ciftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = Outputs.ciftis[resource] + else: + nii_name.inputs.keep_ext = True + + if resource in Outputs.giftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" + + else: + nii_name.inputs.keep_ext = True + + wf.connect(id_string, "out_filename", nii_name, "format_string") + + node, out = self.rpool[resource][pipe_idx]["data"] + try: + wf.connect(node, out, nii_name, "in_file") + except OSError as os_error: + WFLOGGER.warning(os_error) + continue + + write_json = pe.Node( + Function( + input_names=["json_data", "filename"], + output_names=["json_file"], + function=write_output_json, + ), + name=f"json_{resource_idx}_{pipe_x}", + ) + write_json.inputs.json_data = json_info + + wf.connect(id_string, "out_filename", write_json, "filename") + ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") + ds.inputs.parameterization = False + ds.inputs.base_directory = out_dct["out_dir"] + ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] + "s3_encryption" + ] + ds.inputs.container = out_dct["container"] + + if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: # type: ignore[attr-defined] + ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] + "aws_output_bucket_credentials" + ] + expected_outputs += ( + out_dct["subdir"], + create_id_string( + self.cfg, + unique_id, + resource_idx, + template_desc=id_string.inputs.template_desc, + atlas_id=atlas_id, + subdir=out_dct["subdir"], + ), + ) + wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') + wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') + outputs_logger.info(expected_outputs) + + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[STRAT_DICT]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[Resource]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + *, + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> STRAT_DICT: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + *, + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> ( + Optional[Resource | STRAT_DICT] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] + ): ... + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a dictionary of strats. + + Inside those are dictionaries like ``{'data': (node, out), 'json': info}``. + """ + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: None = None, + report_fetched: bool = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + quick_single: Literal[False] = False, + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX], + report_fetched: bool, + quick_single: Literal[True], + ) -> ResourceData: ... + def get_data( + self, + resource, + pipe_idx=None, + report_fetched=False, + quick_single=False, + ): + """Get :py:class:`ResourceData` from `ResourcePool`.""" + _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched) + if report_fetched: + if pipe_idx: + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + if quick_single or len(resource) == 1: + assert isinstance(_resource, dict) + for value in _resource.values(): + return value.data + assert isinstance(_resource, Resource) + return _resource.data + + def get_json(self, resource: str, strat: str | tuple) -> dict: + """Get JSON metadata from a :py:class:`Resource` in a strategy.""" + return self.get(resource, pipe_idx=strat).json + + def get_json_info(self, resource: str, key: str) -> Any: + """Get a metadata value from a matching from any strategy.""" + # TODO: key checks + for val in self.rpool[resource].values(): + if key in val.json: + return val.json[key] + msg = f"{key} not found in any strategy for {resource} in {self}." + raise KeyError(msg) + + @staticmethod + def get_raw_label(resource: str) -> str: + """Remove ``desc-*`` label.""" + for tag in resource.split("_"): + if "desc-" in tag: + resource = resource.replace(f"{tag}_", "") + break + return resource + + def get_strats( # noqa: PLR0912,PLR0915 + self, resources: NODEBLOCK_INPUTS, debug: bool = False + ) -> dict[str | tuple, "StratPool"]: + """Get a dictionary of :py:class:`StratPool` s.""" + # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS + # TODO: (and it doesn't have to be) + import itertools + + linked_resources = [] + resource_list: list[str | list[str]] = [] + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\nresources: %s", resources) + for resource in resources: + # grab the linked-input tuples + if isinstance(resource, tuple): + linked: list[str] = [] + for label in list(resource): + rp_dct, fetched_resource = self.get( + label, report_fetched=True, optional=True + ) + if not rp_dct: + continue + assert fetched_resource is not None + linked.append(fetched_resource) + resource_list += linked + if len(linked) < 2: # noqa: PLR2004 + continue + linked_resources.append(linked) + else: + resource_list.append(resource) + + total_pool = [] + variant_pool: dict = {} + len_inputs = len(resource_list) + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("linked_resources: %s", linked_resources) + verbose_logger.debug("resource_list: %s", resource_list) + for resource in resource_list: + ( + rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath + fetched_resource, + ) = self.get( + resource, + report_fetched=True, + optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be + ) + if not rp_dct: + len_inputs -= 1 + continue + assert isinstance(rp_dct, dict) and fetched_resource is not None + sub_pool = [] + if debug: + verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) + for strat in rp_dct.keys(): + json_info = self.get_json(fetched_resource, strat) + cpac_prov = json_info["CpacProvenance"] + sub_pool.append(cpac_prov) + if fetched_resource not in variant_pool: + variant_pool[fetched_resource] = [] + if "CpacVariant" in json_info: + for key, val in json_info["CpacVariant"].items(): + if val not in variant_pool[fetched_resource]: + variant_pool[fetched_resource] += val + variant_pool[fetched_resource].append(f"NO-{val[0]}") + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) + total_pool.append(sub_pool) + + if not total_pool: + raise LookupError( + "\n\n[!] C-PAC says: None of the listed " + "resources in the node block being connected " + "exist in the resource pool.\n\nResources:\n" + "%s\n\n" % resource_list + ) + + # TODO: right now total_pool is: + # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], + # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] + + # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] + # TODO: and the actual resource is encoded in the tag: of the last item, every time! + # keying the strategies to the resources, inverting it + if len_inputs > 1: + strats = itertools.product(*total_pool) + + # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. + # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. + # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. + new_strats: dict[str | tuple, StratPool] = {} + + # get rid of duplicates - TODO: refactor .product + strat_str_list = [] + strat_list_list = [] + for strat_tuple in strats: + strat_list = list(deepcopy(strat_tuple)) + strat_str = str(strat_list) + if strat_str not in strat_str_list: + strat_str_list.append(strat_str) + strat_list_list.append(strat_list) + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) + for strat_list in strat_list_list: + json_dct = {} + for strat in strat_list: + # strat is a prov list for a single resource/input + prov_resource, strat_idx = self.generate_prov_string(strat) + strat_json = self.get_json(prov_resource, strat=strat_idx) + json_dct[prov_resource] = strat_json + + drop = False + if linked_resources: + for linked in linked_resources: # <--- 'linked' is each tuple + if drop: + break + for xlabel in linked: + if drop or xlabel is None: + break + xjson = json.loads(json.dumps(json_dct[xlabel])) + for ylabel in linked: + if xlabel == ylabel or ylabel is None: + continue + yjson = json.loads(json.dumps(json_dct[ylabel])) + + if "CpacVariant" not in xjson: + xjson["CpacVariant"] = {} + if "CpacVariant" not in yjson: + yjson["CpacVariant"] = {} + + current_strat = [] + for val in xjson["CpacVariant"].values(): + if isinstance(val, list): + current_strat.append(val[0]) + else: + current_strat.append(val) + current_spread = list(set(variant_pool[xlabel])) + for spread_label in current_spread: + if "NO-" in spread_label: + continue + if spread_label not in current_strat: + current_strat.append(f"NO-{spread_label}") + + other_strat = [] + for val in yjson["CpacVariant"].values(): + if isinstance(val, list): + other_strat.append(val[0]) + else: + other_strat.append(val) + other_spread = list(set(variant_pool[ylabel])) + for spread_label in other_spread: + if "NO-" in spread_label: + continue + if spread_label not in other_strat: + other_strat.append(f"NO-{spread_label}") + + for variant in current_spread: + in_current_strat = False + in_other_strat = False + in_other_spread = False + + if variant is None: + in_current_strat = True + if None in other_spread: + in_other_strat = True + if variant in current_strat: + in_current_strat = True + if variant in other_strat: + in_other_strat = True + if variant in other_spread: + in_other_spread = True + + if not in_other_strat: + if in_other_spread: + if in_current_strat: + drop = True + break + + if in_other_strat: + if in_other_spread: + if not in_current_strat: + drop = True + break + if drop: + break + if drop: + continue + + # make the merged strat label from the multiple inputs + # strat_list is actually the merged CpacProvenance lists + pipe_idx = str(strat_list) + new_strats[pipe_idx] = StratPool(name=pipe_idx, cfg=self.cfg) + # new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = {"CpacProvenance": strat_list} + + # now just invert resource:strat to strat:resource for each resource:strat + for cpac_prov in strat_list: + resource, strat = self.generate_prov_string(cpac_prov) + strat_resource = self.rpool[resource][strat] + # remember, `strat_resource` is a Resource. + new_strats[pipe_idx].rpool[resource] = strat_resource + # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. + self.pipe_list.append(pipe_idx) + if "CpacVariant" in strat_resource["json"]: + if "CpacVariant" not in new_strats[pipe_idx]._json: + new_strats[pipe_idx]._json["CpacVariant"] = {} + for younger_resource, variant_list in ( + new_strats[pipe_idx]._json["CpacVariant"].items() + ): + if ( + younger_resource + not in new_strats[pipe_idx]._json["CpacVariant"] + ): + new_strats[pipe_idx]._json["CpacVariant"][ + younger_resource + ] = variant_list + # preserve each input's JSON info also + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) + else: + new_strats = {} + for resource_strat_list in total_pool: + # total_pool will have only one list of strats, for the one input + for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs + resource, pipe_idx = self.generate_prov_string(cpac_prov) + strat_resource = self.rpool[resource][pipe_idx] + # remember, `strat_resource` is a Resource. + new_strats[pipe_idx] = StratPool( + rpool={resource: strat_resource}, name=pipe_idx, cfg=self.cfg + ) # <----- again, new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = strat_resource.json + new_strats[pipe_idx].json["subjson"] = {} + new_strats[pipe_idx].json["CpacProvenance"] = cpac_prov + # preserve each input's JSON info also + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) + return new_strats + + def initialize_nipype_wf(self, name: str = "") -> None: + """Initialize a new nipype :py:class:`~nipype.pipeline.engine.Workflow` .""" + if name: + name = f"_{name}" + workflow_name = f"cpac{name}_{self.unique_id}" + self.wf = pe.Workflow(name=workflow_name) + self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined] + self.wf.config["execution"] = { + "hash_method": "timestamp", + "crashdump_dir": os.path.abspath( + self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined] + ), + } + + def ingress_freesurfer(self) -> None: + """Ingress FreeSurfer data.""" + try: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + self.part_id, + ) + except KeyError: + WFLOGGER.warning("No FreeSurfer data present.") + return + + # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) + if not os.path.exists(fs_path): + if "sub" in self.part_id: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + self.part_id.replace("sub-", ""), + ) + else: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + ("sub-" + self.part_id), + ) + + # patch for flo-specific data + if not os.path.exists(fs_path): + subj_ses = f"{self.part_id}-{self.ses_id}" + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + subj_ses, + ) + if not os.path.exists(fs_path): + WFLOGGER.info( + "No FreeSurfer data found for subject %s", self.part_id + ) + return + + # Check for double nested subj names + if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): + fs_path = os.path.join(fs_path, self.part_id) + + fs_ingress = create_general_datasource("gather_freesurfer_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fs_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + "freesurfer-subject-dir", + fs_ingress, + "outputspec.data", + {}, + "", + "freesurfer_config_ingress", + ) + + recon_outs = { + "pipeline-fs_raw-average": "mri/rawavg.mgz", + "pipeline-fs_subcortical-seg": "mri/aseg.mgz", + "pipeline-fs_brainmask": "mri/brainmask.mgz", + "pipeline-fs_wmparc": "mri/wmparc.mgz", + "pipeline-fs_T1": "mri/T1.mgz", + "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", + "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", + "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", + "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", + "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", + "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", + "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", + "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", + "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", + "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", + "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", + "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", + "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", + "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", + "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", + "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", + "pipeline-fs_xfm": "mri/transforms/talairach.lta", + } + + for key, outfile in recon_outs.items(): + fullpath = os.path.join(fs_path, outfile) + if os.path.exists(fullpath): + fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fullpath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" + ) + else: + WFLOGGER.warning("\n[!] Path does not exist for %s.\n", fullpath) + + return + + def ingress_output_dir(self) -> None: + """Ingress an output directory into a `ResourcePool`.""" + dir_path = self.data_paths.derivatives_dir + assert dir_path is not None + WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) + + anat = os.path.join(dir_path, "anat") + func = os.path.join(dir_path, "func") + + outdir_anat: list[str] = [] + outdir_func: list[str] = [] + func_paths: dict = {} + func_dict: dict = {} + func_key = "" + + for subdir in [anat, func]: + if os.path.isdir(subdir): + for filename in os.listdir(subdir): + for ext in EXTS: + if ext in filename: + if subdir == anat: + outdir_anat.append(os.path.join(subdir, filename)) + else: + outdir_func.append(os.path.join(subdir, filename)) + + # Add derivatives directory to rpool + ingress = create_general_datasource("gather_derivatives_dir") + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=dir_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + "derivatives-dir", + ingress, + "outputspec.data", + {}, + "", + "outdir_config_ingress", + ) + + for subdirs in [outdir_anat, outdir_func]: + for filepath in subdirs: + filename = str(filepath) + for ext in EXTS: + filename = filename.split("/")[-1].replace(ext, "") + + data_label = filename.split(self.unique_id)[1].lstrip("_") + + if len(filename) == len(data_label): + msg = ( + "\n\n[!] Possibly wrong participant or " + "session in this directory?\n\n" + f"Filepath: {filepath}\n\n" + ) + raise Exception(msg) + + bidstag = "" + for tag in data_label.split("_"): + for prefix in ["task-", "run-", "acq-", "rec"]: + if tag.startswith(prefix): + bidstag += f"{tag}_" + data_label = data_label.replace(f"{tag}_", "") + data_label, json = strip_template(data_label) + + json_info, pipe_idx, node_name, data_label = self.json_outdir_ingress( + filepath, data_label, json + ) + + if ( + "template" in data_label + and not json_info["Template"] + == self.cfg.pipeline_setup["outdir_ingress"]["Template"] # type: ignore[attr-defined] + ): + continue + # Rename confounds to avoid confusion in nuisance regression + if data_label.endswith("desc-confounds_timeseries"): + data_label = "pipeline-ingress_desc-confounds_timeseries" + + if len(bidstag) > 1: + # Remove tail symbol + bidstag = bidstag[:-1] + if bidstag.startswith("task-"): + bidstag = bidstag.replace("task-", "") + + # Rename bold mask for CPAC naming convention + # and to avoid collision with anat brain mask + if data_label.endswith("desc-brain_mask") and filepath in outdir_func: + data_label = data_label.replace("brain_mask", "bold_mask") + + try: + pipe_x = self._get_pipe_number(pipe_idx) + except ValueError: + pipe_x = len(self.pipe_list) + if filepath in outdir_anat: + ingress = create_general_datasource( + f"gather_anat_outdir_{data_label!s}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=filepath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + data_label, + ingress, + "outputspec.data", + json_info, + pipe_idx, + node_name=f"outdir_{data_label}_ingress", + inject=True, + ) + else: + if data_label.endswith("desc-preproc_bold"): + func_key = data_label + func_dict[bidstag] = {} + func_dict[bidstag]["scan"] = str(filepath) + func_dict[bidstag]["scan_parameters"] = json_info + func_dict[bidstag]["pipe_idx"] = pipe_idx + if data_label.endswith("desc-brain_mask"): + data_label = data_label.replace("brain_mask", "bold_mask") + try: + func_paths[data_label].append(filepath) + except (AttributeError, KeyError, TypeError): + func_paths[data_label] = [] + func_paths[data_label].append(filepath) + + if func_dict: + self.func_outdir_ingress( + func_dict, + func_key, + func_paths, + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] + self.ingress_freesurfer() + + def ingress_func_metadata( + self, + num_strat=None, + ) -> tuple[bool, bool, list[str]]: + """Ingress metadata for functional scans.""" + name_suffix = "" + for suffix_part in (self.unique_id, num_strat): + if suffix_part is not None: + name_suffix += f"_{suffix_part}" + # Grab field maps + diff = False + blip = False + fmap_rp_list = [] + fmap_TE_list = [] + if self.data_paths.fmap: + second = False + for orig_key in self.data_paths.fmap: + gather_fmap = create_fmap_datasource( + self.data_paths.fmap, f"fmap_gather_{orig_key}_{self.part_id}" + ) + gather_fmap.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + gather_fmap.inputs.inputnode.scan = orig_key + + key = orig_key + if "epi" in key and not second: + key = "epi-1" + second = True + elif "epi" in key and second: + key = "epi-2" + + self.set_data( + key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress" + ) + self.set_data( + f"{key}-scan-params", + gather_fmap, + "outputspec.scan_params", + {}, + "", + "fmap_params_ingress", + ) + + fmap_rp_list.append(key) + + get_fmap_metadata_imports = ["import json"] + get_fmap_metadata = pe.Node( + Function( + input_names=["data_config_scan_params"], + output_names=[ + "dwell_time", + "pe_direction", + "total_readout", + "echo_time", + "echo_time_one", + "echo_time_two", + ], + function=get_fmap_phasediff_metadata, + imports=get_fmap_metadata_imports, + ), + name=f"{key}_get_metadata{name_suffix}", + ) + + self.wf.connect( + gather_fmap, + "outputspec.scan_params", + get_fmap_metadata, + "data_config_scan_params", + ) + + if "phase" in key: + # leave it open to all three options, in case there is a + # phasediff image with either a single EchoTime field (which + # usually matches one of the magnitude EchoTimes), OR + # a phasediff with an EchoTime1 and EchoTime2 + + # at least one of these rpool keys will have a None value, + # which will be sorted out in gather_echo_times below + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-TE1", + get_fmap_metadata, + "echo_time_one", + {}, + "", + "fmap_TE1_ingress", + ) + fmap_TE_list.append(f"{key}-TE1") + + self.set_data( + f"{key}-TE2", + get_fmap_metadata, + "echo_time_two", + {}, + "", + "fmap_TE2_ingress", + ) + fmap_TE_list.append(f"{key}-TE2") + + elif "magnitude" in key: + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-dwell", + get_fmap_metadata, + "dwell_time", + {}, + "", + "fmap_dwell_ingress", + ) + self.set_data( + f"{key}-pedir", + get_fmap_metadata, + "pe_direction", + {}, + "", + "fmap_pedir_ingress", + ) + self.set_data( + f"{key}-total-readout", + get_fmap_metadata, + "total_readout", + {}, + "", + "fmap_readout_ingress", + ) + + if "phase" in key or "mag" in key: + diff = True + + if re.match("epi_[AP]{2}", orig_key): + blip = True + + if diff: + calc_delta_ratio = pe.Node( + Function( + input_names=["effective_echo_spacing", "echo_times"], + output_names=["deltaTE", "ees_asym_ratio"], + function=calc_delta_te_and_asym_ratio, + imports=["from typing import Optional"], + ), + name=f"diff_distcor_calc_delta{name_suffix}", + ) + + gather_echoes = pe.Node( + Function( + input_names=[ + "echotime_1", + "echotime_2", + "echotime_3", + "echotime_4", + ], + output_names=["echotime_list"], + function=gather_echo_times, + ), + name="fugue_gather_echo_times", + ) + + for idx, fmap_file in enumerate(fmap_TE_list, start=1): + try: + node, out_file = self.get_data( + fmap_file, f"['{fmap_file}:fmap_TE_ingress']" + ) + self.wf.connect( + node, out_file, gather_echoes, f"echotime_{idx}" + ) + except KeyError: + pass + + self.wf.connect( + gather_echoes, "echotime_list", calc_delta_ratio, "echo_times" + ) + + # Add in nodes to get parameters from configuration file + # a node which checks if scan_parameters are present for each scan + scan_params = pe.Node( + Function( + input_names=[ + "data_config_scan_params", + "subject_id", + "scan", + "pipeconfig_tr", + "pipeconfig_tpattern", + "pipeconfig_start_indx", + "pipeconfig_stop_indx", + ], + output_names=[ + "tr", + "tpattern", + "template", + "ref_slice", + "start_indx", + "stop_indx", + "pe_direction", + "effective_echo_spacing", + ], + function=get_scan_params, + imports=["from CPAC.utils.utils import check, try_fetch_parameter"], + ), + name=f"bold_scan_params_{self.part_id}{name_suffix}", + ) + scan_params.inputs.subject_id = self.part_id + scan_params.inputs.set( + pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], # type: ignore[attr-defined] + pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], # type: ignore[attr-defined] + ) + + node, out = self.get_data("scan", "['scan:func_ingress']") + self.wf.connect(node, out, scan_params, "scan") + + # Workaround for extracting metadata with ingress + if self.check_rpool("derivatives-dir"): + selectrest_json = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest_json", + ) + selectrest_json.inputs.rest_dict = self.data_paths.as_dict() + selectrest_json.inputs.resource = "scan_parameters" + self.wf.connect(node, out, selectrest_json, "scan") + self.wf.connect( + selectrest_json, "file_path", scan_params, "data_config_scan_params" + ) + + else: + # wire in the scan parameter workflow + node, out = self.get_data( + "scan-params", "['scan-params:scan_params_ingress']" + ) + self.wf.connect(node, out, scan_params, "data_config_scan_params") + + self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") + self.set_data( + "tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress" + ) + self.set_data( + "template", scan_params, "template", {}, "", "func_metadata_ingress" + ) + self.set_data( + "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" + ) + + if diff: + # Connect EffectiveEchoSpacing from functional metadata + self.set_data( + "effectiveEchoSpacing", + scan_params, + "effective_echo_spacing", + {}, + "", + "func_metadata_ingress", + ) + node, out_file = self.get_data( + "effectiveEchoSpacing", "['effectiveEchoSpacing:func_metadata_ingress']" + ) + self.wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") + self.set_data( + "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" + ) + self.set_data( + "ees-asym-ratio", + calc_delta_ratio, + "ees_asym_ratio", + {}, + "", + "ees_asym_ratio_ingress", + ) + + return diff, blip, fmap_rp_list + + def ingress_pipeconfig_paths(self): + """Ingress config file paths.""" + # TODO: may want to change the resource keys for each to include one level up in the YAML as well + + import pandas as pd + import pkg_resources as p + + template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") + template_df = pd.read_csv(template_csv, keep_default_na=False) + + for row in template_df.itertuples(): + key = row.Key + val = row.Pipeline_Config_Entry + val = self.cfg.get_nested(self.cfg, [x.lstrip() for x in val.split(",")]) + resolution = row.Intended_Resolution_Config_Entry + desc = row.Description + + if not val: + continue + + if resolution: + res_keys = [x.lstrip() for x in resolution.split(",")] + tag = res_keys[-1] + json_info = {} + + if "$FSLDIR" in val: + val = val.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + if "$priors_path" in val: + priors_path = ( + self.cfg.segmentation["tissue_segmentation"]["FSL-FAST"][ + "use_priors" + ]["priors_path"] + or "" + ) + if "$FSLDIR" in priors_path: + priors_path = priors_path.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + val = val.replace("$priors_path", priors_path) + if "${resolution_for_anat}" in val: + val = val.replace( + "${resolution_for_anat}", + self.cfg.registration_workflows["anatomical_registration"][ + "resolution_for_anat" + ], + ) + if "${func_resolution}" in val: + val = val.replace( + "${func_resolution}", + self.cfg.registration_workflows["functional_registration"][ + "func_registration_to_template" + ]["output_resolution"][tag], + ) + + if desc: + template_name, _template_desc = lookup_identifier(val) + if template_name: + desc = f"{template_name} - {desc}" + json_info["Description"] = f"{desc} - {val}" + if resolution: + resolution = self.cfg.get_nested(self.cfg, res_keys) + json_info["Resolution"] = resolution + + resampled_template = pe.Node( + Function( + input_names=["resolution", "template", "template_name", "tag"], + output_names=["resampled_template"], + function=resolve_resolution, + as_module=True, + ), + name="resampled_" + key, + ) + + resampled_template.inputs.resolution = resolution + resampled_template.inputs.template = val + resampled_template.inputs.template_name = key + resampled_template.inputs.tag = tag + + # the set_data below is set up a little differently, because we are + # injecting and also over-writing already-existing entries + # other alternative would have been to ingress into the + # resampled_template node from the already existing entries, but we + # didn't do that here + self.set_data( + key, + resampled_template, + "resampled_template", + json_info, + "", + "template_resample", + ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually + + elif val: + config_ingress = create_general_datasource(f"gather_{key}") + config_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=val, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + key, + config_ingress, + "outputspec.data", + json_info, + "", + f"{key}_config_ingress", + ) + + def create_func_datasource( + self, rest_dict: dict, wf_name="func_datasource" + ) -> pe.Workflow: + """Create a :py:class:`~nipype.pipeline.engine.Workflow` to gather timeseries data. + + Return the functional timeseries-related file paths for each series/scan from the + dictionary of functional files described in the data configuration (sublist) YAML + file. + + Scan input (from inputnode) is an iterable. + """ + wf = pe.Workflow(name=wf_name) + + inputnode = pe.Node( + util.IdentityInterface( + fields=["subject", "scan", "creds_path", "dl_dir"], + mandatory_inputs=True, + ), + name="inputnode", + ) + + outputnode = pe.Node( + util.IdentityInterface( + fields=[ + "subject", + "rest", + "scan", + "scan_params", + "phase_diff", + "magnitude", + ] + ), + name="outputspec", + ) + + # have this here for now because of the big change in the data + # configuration format + # (Not necessary with ingress - format does not comply) + if not self.check_rpool("derivatives-dir"): + check_scan = pe.Node( + Function( + input_names=["func_scan_dct", "scan"], + output_names=[], + function=check_func_scan, + as_module=True, + ), + name="check_func_scan", + ) + + check_scan.inputs.func_scan_dct = rest_dict + wf.connect(inputnode, "scan", check_scan, "scan") + + # get the functional scan itself + selectrest = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest", + ) + selectrest.inputs.rest_dict = rest_dict + selectrest.inputs.resource = "scan" + wf.connect(inputnode, "scan", selectrest, "scan") + + # check to see if it's on an Amazon AWS S3 bucket, and download it, if it + # is - otherwise, just return the local file path + check_s3_node = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="check_for_s3", + ) + + wf.connect(selectrest, "file_path", check_s3_node, "file_path") + wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") + wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") + check_s3_node.inputs.img_type = "func" + + wf.connect(inputnode, "subject", outputnode, "subject") + wf.connect(check_s3_node, "local_path", outputnode, "rest") + wf.connect(inputnode, "scan", outputnode, "scan") + + # scan parameters CSV + select_scan_params = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="select_scan_params", + ) + select_scan_params.inputs.rest_dict = rest_dict + select_scan_params.inputs.resource = "scan_parameters" + wf.connect(inputnode, "scan", select_scan_params, "scan") + + # if the scan parameters file is on AWS S3, download it + s3_scan_params = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="s3_scan_params", + ) + + wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") + wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") + wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") + wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") + + return wf + + def ingress_raw_func_data(self): + """Ingress raw functional data.""" + func_paths_dct = self.data_paths.func + + func_wf = self.create_func_datasource( + func_paths_dct, f"func_ingress_{self.part_id}_{self.ses_id}" + ) + func_wf.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) + + self.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") + self.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") + self.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + func_wf, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + + # TODO: CHECK FOR PARAMETERS + + diff, blip, fmap_rp_list = self.ingress_func_metadata() + + # Memoize list of local functional scans + # TODO: handle S3 files + # Skip S3 files for now + + local_func_scans = [ + func_paths_dct[scan]["scan"] + for scan in func_paths_dct.keys() + if not func_paths_dct[scan]["scan"].startswith("s3://") + ] + if local_func_scans: + # pylint: disable=protected-access + self.wf._local_func_scans = local_func_scans + if self.cfg.pipeline_setup["Debugging"]["verbose"]: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("local_func_scans: %s", local_func_scans) + del local_func_scans + + return diff, blip, fmap_rp_list + + def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> None: + """Ingress a functional output directory.""" + pipe_x = len(self.pipe_list) + ingress = self.create_func_datasource( + func_dict, f"gather_func_outdir_{key}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + subject=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") + ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) + self.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") + + self.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + ingress, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + self.ingress_func_metadata() + + # Have to do it this weird way to save the parsed BIDS tag & filepath + mask_paths_key = ( + "desc-bold_mask" + if "desc-bold_mask" in func_paths + else "space-template_desc-bold_mask" + ) + ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" + + # Connect func data with approproate scan name + iterables = pe.Node( + Function( + input_names=["scan", "mask_paths", "ts_paths"], + output_names=["out_scan", "mask", "confounds"], + function=set_iterables, + ), + name=f"set_iterables_{pipe_x}", + ) + iterables.inputs.mask_paths = func_paths[mask_paths_key] + iterables.inputs.ts_paths = func_paths[ts_paths_key] + self.wf.connect(ingress, "outputspec.scan", iterables, "scan") + + for key in func_paths: + if key in (mask_paths_key, ts_paths_key): + ingress_func = create_general_datasource(f"ingress_func_data_{key}") + ingress_func.inputs.inputnode.set( + unique_id=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") + if key == mask_paths_key: + self.wf.connect(iterables, "mask", ingress_func, "inputnode.data") + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + elif key == ts_paths_key: + self.wf.connect( + iterables, "confounds", ingress_func, "inputnode.data" + ) + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + + def json_outdir_ingress( + self, filepath: Path | str, data_label: str, json: dict + ) -> tuple[dict, tuple[str, str], str, str]: + """Ingress sidecars from a BIDS derivatives directory.""" + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in EXTS: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", + filepath, + jsonpath, + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for _strat_idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return json_info, pipe_idx, node_name, data_label + + def ingress_raw_anat_data(self) -> None: + """Ingress raw anatomical data.""" + if not self.data_paths.anat: + WFLOGGER.warning("No anatomical data present.") + return + + anat_flow = create_anat_datasource(f"anat_T1w_gather_{self.unique_id}") + + anat = {} + if "T1w" in self.data_paths.anat: + anat["T1"] = self.data_paths.anat["T1w"] + + if "T1" in anat: + anat_flow.inputs.inputnode.set( + subject=self.part_id, + anat=anat["T1"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + img_type="anat", + ) + self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") + + if "T2w" in self.data_paths.anat: + anat_flow_T2 = create_anat_datasource( + f"anat_T2w_gather_{self.part_id}_{self.ses_id}" + ) + anat_flow_T2.inputs.inputnode.set( + subject=self.part_id, + anat=self.data_paths.anat["T2w"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + img_type="anat", + ) + self.set_data( + "T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress" + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] + self.ingress_freesurfer() + + def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 + """Connect a :py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlock` via the `ResourcePool`.""" + debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] + all_opts: list[str] = [] + + sidecar_additions = { + "CpacConfigHash": hashlib.sha1( + json.dumps(self.cfg.dict(), sort_keys=True).encode("utf-8") + ).hexdigest(), + "CpacConfig": self.cfg.dict(), + } + + if self.cfg["pipeline_setup"]["output_directory"].get("user_defined"): + sidecar_additions["UserDefined"] = self.cfg["pipeline_setup"][ + "output_directory" + ]["user_defined"] + + for name, block_dct in block.node_blocks.items(): + # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. + switch = _check_null(block_dct["switch"]) + config = _check_null(block_dct["config"]) + option_key = _check_null(block_dct["option_key"]) + option_val = _check_null(block_dct["option_val"]) + inputs: NODEBLOCK_INPUTS = _check_null(block_dct["inputs"]) + outputs: NODEBLOCK_OUTPUTS = _check_null(block_dct["outputs"]) + + block_function: NodeBlockFunction = block_dct["block_function"] + + opts = [] + if option_key and option_val: + if not isinstance(option_key, list): + option_key = [option_key] + if not isinstance(option_val, list): + option_val = [option_val] + if config: + key_list = config + option_key + else: + key_list = option_key + if "USER-DEFINED" in option_val: + # load custom config data into each 'opt' + opts = self.cfg[key_list] + else: + for option in option_val: + try: + if option in self.cfg[key_list]: + # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list + opts.append(option) + except AttributeError as err: + msg = f"{err}\nNode Block: {name}" + raise Exception(msg) + + if opts is None: + opts = [opts] + + elif option_key and not option_val: + # enables multiple config forking entries + if not isinstance(option_key[0], list): + msg = ( + f"[!] The option_key field ({option_key}) " + f"for {name} exists but there is no " + "option_val.\n\nIf you are trying to " + "populate multiple option keys, the " + "option_val field must contain a list of " + "a list.\n" + ) + raise ValueError(msg) + for option_config in option_key: + # option_config is a list of pipe config levels down to the option + if config: + key_list = config + option_config + else: + key_list = option_config + option_val = option_config[-1] + if option_val in self.cfg[key_list[:-1]]: + opts.append(option_val) + else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! + opts = [None] + # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples + if not opts: + # for node blocks where the options are split into different + # block functions - opts will be empty for non-selected + # options, and would waste the get_strats effort below + continue + all_opts += opts + + if not switch: + switch = [True] + else: + if config: + try: + key_list = config + switch + except TypeError as te: + msg = ( + "\n\n[!] Developer info: Docstring error " + f"for {name}, make sure the 'config' or " + "'switch' fields are lists.\n\n" + ) + raise TypeError(msg) from te + switch = self.cfg[key_list] + elif isinstance(switch[0], list): + # we have multiple switches, which is designed to only work if + # config is set to "None" + switch_list = [] + for key_list in switch: + val = self.cfg[key_list] + if isinstance(val, list): + # fork switches + if True in val: + switch_list.append(True) + if False in val: + switch_list.append(False) + else: + switch_list.append(val) + if False in switch_list: + switch = [False] + else: + switch = [True] + else: + # if config is set to "None" + key_list = switch + switch = self.cfg[key_list] + if not isinstance(switch, list): + switch = [switch] + if True in switch: + for ( + pipe_idx, + strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} + ) in self.get_strats(inputs, debug).items(): + # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } + fork = False in switch + for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. + # remember, you can get 'data' or 'json' from strat_pool with member functions + # strat_pool has all of the JSON information of all the inputs! + # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. + # particularly, our custom 'CpacProvenance' field. + node_name = name + pipe_x = self._get_pipe_number(pipe_idx) + + replaced_inputs = [] + for interface in block.input_interface: + if isinstance(interface[1], list): + for input_name in interface[1]: + if strat_pool.check_rpool(input_name): + break + else: + input_name = interface[1] + strat_pool.copy_resource(input_name, interface[0]) + replaced_inputs.append(interface[0]) + try: + wf, outs = block_function( + wf, self.cfg, strat_pool, pipe_x, opt + ) + except IOError as e: # duplicate node + WFLOGGER.warning(e) + continue + + if not outs: + if block_function.__name__ == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append( + WARNING_FREESURFER_OFF_WITH_DATA + ) + continue + + if opt and len(option_val) > 1: + node_name = f"{node_name}_{opt}" + elif opt and "USER-DEFINED" in option_val: + node_name = f'{node_name}_{opt["Name"]}' + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\n=======================") + verbose_logger.debug("Node name: %s", node_name) + prov_dct = self.get_resource_strats_from_prov( + ast.literal_eval(str(pipe_idx)) + ) + for key, val in prov_dct.items(): + verbose_logger.debug("-------------------") + verbose_logger.debug("Input - %s:", key) + sub_prov_dct = self.get_resource_strats_from_prov(val) + for sub_key, sub_val in sub_prov_dct.items(): + sub_sub_dct = self.get_resource_strats_from_prov( + sub_val + ) + verbose_logger.debug(" sub-input - %s:", sub_key) + verbose_logger.debug(" prov = %s", sub_val) + verbose_logger.debug( + " sub_sub_inputs = %s", sub_sub_dct.keys() + ) + + for label, connection in outs.items(): + block.check_output(outputs, label, name) + new_json_info = strat_pool.json + + # transfer over data-specific json info + # for example, if the input data json is _bold and the output is also _bold + data_type = label.split("_")[-1] + if data_type in new_json_info["subjson"]: + if ( + "SkullStripped" + in new_json_info["subjson"][data_type] + ): + new_json_info["SkullStripped"] = new_json_info[ + "subjson" + ][data_type]["SkullStripped"] + + # determine sources for the outputs, i.e. all input data into the node block + new_json_info["Sources"] = [ + x + for x in strat_pool.rpool + if x != "json" and x not in replaced_inputs + ] + + if isinstance(outputs, dict): + new_json_info.update(outputs[label]) + if "Description" not in outputs[label]: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + if "Template" in outputs[label]: + template_key = outputs[label]["Template"] + if template_key in new_json_info["Sources"]: + # only if the pipeline config template key is entered as the 'Template' field + # otherwise, skip this and take in the literal 'Template' string + try: + new_json_info["Template"] = new_json_info[ + "subjson" + ][template_key]["Description"] + except KeyError: + pass + try: + new_json_info["Resolution"] = new_json_info[ + "subjson" + ][template_key]["Resolution"] + except KeyError: + pass + else: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + + if "Description" in new_json_info: + new_json_info["Description"] = " ".join( + new_json_info["Description"].split() + ) + + for sidecar_key, sidecar_value in sidecar_additions.items(): + if sidecar_key not in new_json_info: + new_json_info[sidecar_key] = sidecar_value + + try: + del new_json_info["subjson"] + except KeyError: + pass + + if fork or len(opts) > 1 or len(all_opts) > 1: + if "CpacVariant" not in new_json_info: + new_json_info["CpacVariant"] = {} + raw_label = self.get_raw_label(label) + if raw_label not in new_json_info["CpacVariant"]: + new_json_info["CpacVariant"][raw_label] = [] + new_json_info["CpacVariant"][raw_label].append( + node_name + ) + + self.set_data( + label, + connection[0], + connection[1], + new_json_info, + pipe_idx, + node_name, + fork, + ) + + wf, post_labels = self.post_process( + wf, + label, + connection, + new_json_info, + pipe_idx, + pipe_x, + outs, + ) + + if self.func_reg: + for postlabel in post_labels: + connection = ResourceData( # noqa: PLW2901 + postlabel[1], postlabel[2] + ) + wf = self.derivative_xfm( + wf, + postlabel[0], + connection, + new_json_info, + pipe_idx, + pipe_x, + ) + return wf + + def connect_pipeline( + self, + wf: pe.Workflow, + cfg: Configuration, + pipeline_blocks: PIPELINE_BLOCKS, + ) -> pe.Workflow: + """Connect the pipeline blocks to the workflow.""" + from CPAC.pipeline.engine.nodeblock import NodeBlockFunction, PIPELINE_BLOCKS + + WFLOGGER.info( + "Connecting pipeline blocks:\n%s", + NodeBlock.list_blocks(pipeline_blocks, indent=1), + ) + previous_nb: Optional[NodeBlockFunction | PIPELINE_BLOCKS] = None + for block in pipeline_blocks: + try: + wf = self.connect_block( + wf, + NodeBlock( + block, debug=cfg["pipeline_setup", "Debugging", "verbose"] + ), + ) + except LookupError as e: + if getattr(block, "name", "") == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) + continue + previous_nb_str = ( + (f"after node block '{previous_nb.name}':") + if isinstance(previous_nb, NodeBlockFunction) + else "at beginning:" + ) + # Alert user to block that raises error + if isinstance(block, list): + node_block_names = str([NodeBlock(b).name for b in block]) + e.args = ( + f"When trying to connect one of the node blocks " + f"{node_block_names} " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + else: + node_block_names = NodeBlock(block).name + e.args = ( + f"When trying to connect node block " + f"'{node_block_names}' " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined] + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug(e.args[0]) + verbose_logger.debug(self) + raise + previous_nb = block + + return wf + + def derivative_xfm( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + ) -> pe.Workflow: + """Find the appropriate bold-to-template transform for given `pipe_idx`.""" + if label in self.xfm: + json_info = dict(json_info) + + # get the bold-to-template transform from the current strat_pool info + xfm_idx: Optional[str | tuple] = None + xfm_label = "from-bold_to-template_mode-image_xfm" + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == xfm_label: + xfm_prov = entry + xfm_idx = self.generate_prov_string(xfm_prov)[1] + break + + # but if the resource doesn't have the bold-to-template transform + # in its provenance/strategy, find the appropriate one for this + # current pipe_idx/strat + xfm_info: list[tuple[str | tuple, list]] + if not xfm_idx: + xfm_info = [] + for pipe_idx, entry in self.get(xfm_label).items(): + xfm_info.append((pipe_idx, entry.cpac_provenance)) + else: + xfm_info = [(xfm_idx, xfm_prov)] + + for num, xfm_entry in enumerate(xfm_info): + xfm_idx, xfm_prov = xfm_entry + reg_tool = check_prov_for_regtool(xfm_prov) + + xfm = transform_derivative( + f"{label}_xfm_{pipe_x}_{num}", + label, + reg_tool, + self.num_cpus, + self.num_ants_cores, + ants_interp=self.ants_interp, + fsl_interp=self.fsl_interp, + opt=None, + ) + wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") + + node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) + wf.connect(node, out, xfm, "inputspec.reference") + + node, out = self.get_data( + "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx + ) + wf.connect(node, out, xfm, "inputspec.transform") + + label = f"space-template_{label}" + json_info["Template"] = self.get_json_info( + "T1w-brain-template-deriv", "Description" + ) + new_prov = json_info["CpacProvenance"] + xfm_prov + json_info["CpacProvenance"] = new_prov + new_pipe_idx = self.generate_prov_string(new_prov) + self.set_data( + label, + xfm, + "outputspec.out_file", + json_info, + new_pipe_idx, + f"{label}_xfm_{num}", + fork=True, + ) + + return wf + + def post_process( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + outs: dict[str, ResourceData], + ) -> tuple[pe.Workflow, list[tuple[str, pe.Node | pe.Workflow, str]]]: + """Connect smoothing and z-scoring, if configured.""" + input_type = "func_derivative" + + post_labels = [(label, connection[0], connection[1])] + + if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): + # suffix: [eigenvector or degree] centrality [binarized or weighted] + # or lfcd [binarized or weighted] + mask = "template-specification-file" + elif "space-template" in label: + if "space-template_res-derivative_desc-bold_mask" in self.keys(): + mask = "space-template_res-derivative_desc-bold_mask" + else: + mask = "space-template_desc-bold_mask" + else: + mask = "space-bold_desc-brain_mask" + + mask_idx = None + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == mask: + mask_prov = entry + mask_idx = self.generate_prov_string(mask_prov)[1] + break + + if self.smoothing_bool: + if label in Outputs.to_smooth: + for smooth_opt in self.smooth_opts: + sm = spatial_smoothing( + f"{label}_smooth_{smooth_opt}_{pipe_x}", + self.fwhm, + input_type, + smooth_opt, + ) + wf.connect(connection[0], connection[1], sm, "inputspec.in_file") + node, out = self.get_data( + mask, pipe_idx=mask_idx, quick_single=mask_idx is None + ) + wf.connect(node, out, sm, "inputspec.mask") + + if "desc-" not in label: + if "space-" in label: + for tag in label.split("_"): + if "space-" in tag: + smlabel = label.replace(tag, f"{tag}_desc-sm") + break + else: + smlabel = f"desc-sm_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-sm" + smlabel = label.replace(tag, newtag) + break + + post_labels.append((smlabel, sm, "outputspec.out_file")) + + self.set_data( + smlabel, + sm, + "outputspec.out_file", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + self.set_data( + "fwhm", + sm, + "outputspec.fwhm", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + + if self.zscoring_bool: + for label_con_tpl in post_labels: + label = label_con_tpl[0] + connection = (label_con_tpl[1], label_con_tpl[2]) + if label in Outputs.to_zstd: + zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) + + wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") + + node, out = self.get_data(mask, pipe_idx=mask_idx) + wf.connect(node, out, zstd, "inputspec.mask") + + if "desc-" not in label: + if "space-template" in label: + new_label = label.replace( + "space-template", "space-template_desc-zstd" + ) + else: + new_label = f"desc-zstd_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-zstd" + new_label = label.replace(tag, newtag) + break + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "zscore_standardize", + fork=True, + ) + + elif label in Outputs.to_fisherz: + zstd = fisher_z_score_standardize( + f"{label}_zstd_{pipe_x}", label, input_type + ) + + wf.connect( + connection[0], connection[1], zstd, "inputspec.correlation_file" + ) + + # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' + oned = label.replace("correlations", "timeseries") + + node, out = outs[oned] + wf.connect(node, out, zstd, "inputspec.timeseries_oned") + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "fisher_zscore_standardize", + fork=True, + ) + + return wf, post_labels + + @staticmethod + def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: + """Return all entries that led to this provenance. + + If you provide the provenance of a `ResourcePool` output, this will + return a dictionary of all the preceding `ResourcePool` entries that + led to that one specific output:: + {rpool entry}: {that entry's provenance} + {rpool entry}: {that entry's provenance} + """ + strat_resource: dict[str, list | str] = {} + if isinstance(prov, str): + resource = prov.split(":")[0] + strat_resource[resource] = prov + else: + for entry in prov: + if isinstance(entry, list): + resource = entry[-1].split(":")[0] + strat_resource[resource] = entry + elif isinstance(entry, str): + resource = entry.split(":")[0] + strat_resource[resource] = entry + return strat_resource + + def _config_lookup( + self, keylist: str | list[str], fallback_type: type = NoneType + ) -> Any: + """Lookup a :py:class:`~CPAC.utils.configuration.Configuration` key, return ``None`` if not found.""" + try: + return self.cfg[keylist] + except (AttributeError, KeyError): + return fallback_type() + + def _get_pipe_number(self, pipe_idx: str | tuple) -> int: + """Return the index of a strategy in `self.pipe_list`.""" + return self.pipe_list.index(pipe_idx) + + def _get_unlabelled(self, resource: str) -> set[str]: + """Get unlabelled :py:class:`Resource` s. + + These :py:class:`Resource` s need integer suffixes to differentiate. + """ + from CPAC.func_preproc.func_motion import motion_estimate_filter + + all_jsons = [ + self.rpool[resource][pipe_idx]._json for pipe_idx in self.rpool[resource] + ] + unlabelled = { + key + for json_info in all_jsons + for key in json_info.get("CpacVariant", {}).keys() + if key not in (*motion_estimate_filter.outputs, "regressors") + } + if "bold" in unlabelled: + all_bolds = list( + chain.from_iterable( + json_info["CpacVariant"]["bold"] + for json_info in all_jsons + if "CpacVariant" in json_info and "bold" in json_info["CpacVariant"] + ) + ) + if all( + re.match(r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold) + for _bold in all_bolds + ): + # this fork point should only result in 0 or 1 forks + unlabelled.remove("bold") + del all_bolds + all_forks = { + key: set( + chain.from_iterable( + json_info["CpacVariant"][key] + for json_info in all_jsons + if "CpacVariant" in json_info and key in json_info["CpacVariant"] + ) + ) + for key in unlabelled + } + del all_jsons + for key, forks in all_forks.items(): + if len(forks) < 2: # noqa: PLR2004 + # no int suffix needed if only one fork + unlabelled.remove(key) + del all_forks + return unlabelled + + +class StratPool(_Pool): + """A pool of :py:class:`ResourcePool` s keyed by strategy.""" + + def __init__( + self, + cfg: Configuration, + *, + rpool: Optional[dict] = None, + name: str | list[str] = "", + ) -> None: + """Initialize a `StratPool`.""" + super().__init__() + if not rpool: + self.rpool = STRAT_DICT({}) + else: + self.rpool = STRAT_DICT(rpool) + self._json: dict[str, dict] = {"subjson": {}} + self.cfg = cfg + if not isinstance(name, list): + name = [name] + self.name: list[str] = name + self._regressor_dct: dict = {} + + def append_name(self, name: str) -> None: + """Append a name to the `StratPool`.""" + self.name.append(name) + + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[Resource]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX], + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + *, + report_fetched: Literal[True], + optional: Literal[False], + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + *, + optional: Literal[True], + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + *, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a :py:class:`Resource` .""" + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[True] + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[False] = False + ) -> ResourceData: ... + def get_data(self, resource, report_fetched=False): + """Get :py:class:`ResourceData` from a `StratPool`.""" + _resource = self.get(resource, report_fetched=report_fetched) + if report_fetched: + assert isinstance(_resource, tuple) + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + assert isinstance(_resource, Resource) + return _resource.data + + def get_json(self, resource: str) -> dict: + """Get JSON metadata from a :py:class:`Resource` in a `StratPool`.""" + return self.get(resource).json + + json = property( + fget=Resource.get_json, + fset=Resource.set_json, + doc="""Return a deep copy of full-`StratPool`-strategy-specific JSON.""", + ) + + def get_cpac_provenance(self, resource: list[str] | str) -> list: + """Get "CpacProvenance" for a given :py:class:`Resource` .""" + # NOTE: strat_resource has to be entered properly by the developer + # it has to either be rpool[resource][strat] or strat_pool[resource] + if isinstance(resource, list): + for _resource in resource: + try: + return self.get_cpac_provenance(_resource) + except KeyError: + continue + return self.get(resource).cpac_provenance + + def copy_resource(self, resource: str, new_name: str): + """Copy a :py:class:`Resource` within a `StratPool`.""" + try: + self.rpool[new_name] = self.rpool[resource] + except KeyError: + msg = f"[!] {resource} not in the resource pool." + raise Exception(msg) + + def filter_name(self, cfg: Configuration) -> str: + """ + Return the name of the filter for this strategy. + + In a `StratPool` with filtered movement parameters. + """ + motion_filters = cfg[ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "filters", + ] + if len(motion_filters) == 1 and cfg.switch_is_on( + [ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "run", + ], + exclusive=True, + ): + return motion_filters[0]["Name"] + try: + key = "motion" + sidecar = self.get_json("desc-movementParameters_motion") + except KeyError: + sidecar = None + if sidecar is not None and "CpacVariant" in sidecar: + if sidecar["CpacVariant"][key]: + return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] + return "none" + + def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: + """Preserve JSON info when updating a `StratPool`.""" + data_type = resource.split("_")[-1] + if data_type not in self._json["subjson"]: + self._json["subjson"][data_type] = {} + self._json["subjson"][data_type].update(strat_resource.json) + + @property + def regressor_dct(self) -> dict: + """Return the regressor dictionary for the current strategy if one exists. + + Raises + ------ + KeyError + If regressor dictionary does not exist in current strategy. + """ + # pylint: disable=attribute-defined-outside-init + if hasattr(self, "_regressor_dct") and self._regressor_dct: # memoized + # pylint: disable=access-member-before-definition + return self._regressor_dct + key_error = KeyError( + "[!] No regressors in resource pool. \n\n" + "Try turning on create_regressors or " + "ingress_regressors." + ) + _nr = self.cfg["nuisance_corrections", "2-nuisance_regression"] + if not hasattr(self, "timeseries"): + if _nr["Regressors"]: + self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} + else: + self.regressors = [] + if self.check_rpool("parsed_regressors"): # ingressed regressor + # name regressor workflow without regressor_prov + strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] + if strat_name in self.regressors: + self._regressor_dct = self.regressors[strat_name] + return self._regressor_dct + self._regressor_dct = _nr["ingress_regressors"]["Regressors"] + return self._regressor_dct + prov = self.get_cpac_provenance("desc-confounds_timeseries") + strat_name_components = prov[-1].split("_") + for _ in list(range(prov[-1].count("_"))): + reg_name = "_".join(strat_name_components[-_:]) + if isinstance(self.regressors, dict) and reg_name in self.regressors: + self._regressor_dct = self.regressors[reg_name] + return self._regressor_dct + raise key_error + + @property + def filtered_movement(self) -> bool: + """Check if the movement parameters have been filtered in this `StratPool`.""" + try: + return "motion_estimate_filter" in str( + self.get_cpac_provenance("desc-movementParameters_motion") + ) + except KeyError: + # not a strat_pool or no movement parameters in strat_pool + return False + + +def _check_null(val: Any) -> Any: + """Return ``None`` if `val` == "none" (case-insensitive).""" + if isinstance(val, str): + val = None if val.lower() == "none" else val + return val diff --git a/CPAC/pipeline/nodeblock.py b/CPAC/pipeline/nodeblock.py deleted file mode 100644 index 53b9db1330..0000000000 --- a/CPAC/pipeline/nodeblock.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (C) 2023-2024 C-PAC Developers - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -"""Class and decorator for NodeBlock functions.""" - -from typing import Any, Callable, Optional - - -class NodeBlockFunction: - """Store a reference to the nodeblock function and all of its meta-data.""" - - def __init__( - self, - func: Callable, - name: Optional[str] = None, - config: Optional[list[str]] = None, - switch: Optional[list[str] | list[list[str]]] = None, - option_key: Optional[str | list[str]] = None, - option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, - outputs: Optional[list[str] | dict[str, Any]] = None, - ) -> None: - self.func = func - """Nodeblock function reference.""" - self.name: Optional[str] = name - """Used in the graph and logging to identify the NodeBlock and its component nodes.""" - self.config: Optional[list[str]] = config - """ - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the - root of the configuration. - """ - self.switch: Optional[list[str] | list[list[str]]] = switch - """ - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. - """ - self.option_key: Optional[str | list[str]] = option_key - """ - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - """ - self.option_val: Optional[str | list[str]] = option_val - """Indicates values for which this NodeBlock should be active.""" - self.inputs: Optional[list[str | list | tuple]] = inputs - """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" - self.outputs: Optional[list[str] | dict[str, Any]] = outputs - """ - ResourcePool keys indicating resources generated or updated by the NodeBlock, optionally including metadata - for the outputs' respective sidecars. - """ - - # Forward function attributes similar to functools.update_wrapper: - # https://docs.python.org/3/library/functools.html#functools.update_wrapper - self.__module__ = func.__module__ - self.__name__ = func.__name__ - self.__qualname__ = func.__qualname__ - self.__annotations__ = func.__annotations__ - self.__doc__ = "".join( - [ - _.replace(" ", "") - for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__] - if _ is not None - ] - ).rstrip() - - # all node block functions have this signature - def __call__(self, wf, cfg, strat_pool, pipe_num, opt=None): - """ - - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool - - pipe_num : int - - opt : str, optional - - Returns - ------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - out : dict - """ - return self.func(wf, cfg, strat_pool, pipe_num, opt) - - def legacy_nodeblock_dict(self): - """Return nodeblock metadata as a dictionary. - - Helper for compatibility reasons. - """ - return { - "name": self.name, - "config": self.config, - "switch": self.switch, - "option_key": self.option_key, - "option_val": self.option_val, - "inputs": self.inputs, - "outputs": self.outputs, - } - - def __repr__(self) -> str: - """Return reproducible string representation of a NodeBlockFunction.""" - return ( - f"NodeBlockFunction({self.func.__module__}." - f'{self.func.__name__}, "{self.name}", ' - f"config={self.config}, switch={self.switch}, " - f"option_key={self.option_key}, option_val=" - f"{self.option_val}, inputs={self.inputs}, " - f"outputs={self.outputs})" - ) - - def __str__(self) -> str: - """Return string representation of a NodeBlockFunction.""" - return f"NodeBlockFunction({self.name})" - - -def nodeblock( - name: Optional[str] = None, - config: Optional[list[str]] = None, - switch: Optional[list[str] | list[list[str]]] = None, - option_key: Optional[str | list[str]] = None, - option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, - outputs: Optional[list[str] | dict[str, Any]] = None, -): - """ - Define a node block. - - Connections to the pipeline configuration and to other node blocks. - - Parameters - ---------- - name - Used in the graph and logging to identify the NodeBlock and its component nodes. - config - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the - root of the configuration. - switch - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. - option_key - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - option_val - Indicates values for which this NodeBlock should be active. - inputs - ResourcePool keys indicating files needed for the NodeBlock's functionality. - outputs - ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata - for the outputs' respective sidecars. - """ - return lambda func: NodeBlockFunction( - func, - name if name is not None else func.__name__, - config, - switch, - option_key, - option_val, - inputs, - outputs, - ) diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index 915cb47045..6dc11326d5 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -21,6 +21,7 @@ from itertools import chain, permutations import re from subprocess import CalledProcessError +from typing import Any as TypeAny, Optional as TypeOptional import numpy as np from pathvalidate import sanitize_filename @@ -63,18 +64,12 @@ Number = Any(float, int, All(str, Match(SCIENTIFIC_NOTATION_STR_REGEX))) -def str_to_bool1_1(x): # pylint: disable=invalid-name - """Convert strings to Booleans for YAML1.1 syntax. +def str_to_bool1_1(x: TypeAny) -> bool: # pylint: disable=invalid-name + """Convert strings to Booleans for YAML1.1 syntax [1]_. - Ref https://yaml.org/type/bool.html - - Parameters + References ---------- - x : any - - Returns - ------- - bool + .. [1] 2005-01-18. Oren Ben-Kiki, Clark Evans & Brian Ingerson. `"Boolean Language-Independent Type for YAML™ Version 1.1" [Working Draft] `_. Copyright © 2001-2005 Oren Ben-Kiki, Clark Evans, Brian Ingerson. """ if isinstance(x, str): try: @@ -316,19 +311,9 @@ def str_to_bool1_1(x): # pylint: disable=invalid-name ) -def name_motion_filter(mfilter, mfilters=None): +def name_motion_filter(mfilter: dict, mfilters: TypeOptional[list] = None) -> str: """Given a motion filter, create a short string for the filename. - Parameters - ---------- - mfilter : dict - - mfliters : list or None - - Returns - ------- - str - Examples -------- >>> name_motion_filter({'filter_type': 'notch', 'filter_order': 2, @@ -385,19 +370,8 @@ def name_motion_filter(mfilter, mfilters=None): return name -def permutation_message(key, options): - """Give a human-readable error message for keys that accept permutation values. - - Parameters - ---------- - key: str - - options: list or set - - Returns - ------- - msg: str - """ +def permutation_message(key: str, options: list | set) -> str: + """Give a human-readable error message for keys that accept permutation values.""" return f""" \'{key}\' takes a dictionary with paths to region-of-interest (ROI) @@ -412,7 +386,7 @@ def permutation_message(key, options): """ -def sanitize(filename): +def sanitize(filename: str) -> str: """Sanitize a filename and replace whitespaces with underscores.""" return re.sub(r"\s+", "_", sanitize_filename(filename)) @@ -1253,20 +1227,12 @@ def sanitize(filename): ) -def schema(config_dict): +def schema(config_dict: dict) -> dict: """Validate a participant-analysis pipeline configuration. Validate against the latest validation schema by first applying backwards- compatibility patches, then applying Voluptuous validation, then handling complex - configuration interaction checks before returning validated config_dict. - - Parameters - ---------- - config_dict : dict - - Returns - ------- - dict + configuration interaction checks before returning validated `config_dict`. """ from CPAC.utils.utils import _changes_1_8_0_to_1_8_1 diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index c228fc3640..07e0e6e5a4 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -1,154 +1,101 @@ -import os +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Tests for C-PAC pipeline engine.""" + +from pathlib import Path import pytest from CPAC.pipeline.cpac_pipeline import ( build_anat_preproc_stack, build_workflow, - connect_pipeline, - initialize_nipype_wf, - load_cpac_pipe_config, -) -from CPAC.pipeline.engine import ( - ingress_pipeconfig_paths, - ingress_raw_anat_data, - ingress_raw_func_data, - initiate_rpool, - ResourcePool, ) +from CPAC.pipeline.engine import ResourcePool from CPAC.utils.bids_utils import create_cpac_data_config - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_func_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - if "func" in sub_data_dct: - wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id - ) - - rpool.gather_pipes(wf, cfg, all=True) - - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_anat_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - rpool = ingress_raw_anat_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id +from CPAC.utils.configuration import Configuration, Preconfiguration + + +def _set_up_test( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> tuple[Configuration, dict]: + """Set up `cfg` and `sub_data` for engine tests.""" + bids_dir = str(bids_examples / "ds051") + sub_data = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] + cfg = Preconfiguration(preconfig) + cfg.pipeline_setup["output_directory"]["path"] = str(tmp_path / "out") + cfg.pipeline_setup["working_directory"]["path"] = str(tmp_path / "work") + cfg.pipeline_setup["log_directory"]["path"] = str(tmp_path / "logs") + return cfg, sub_data + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_func_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_anat_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool( + cfg=cfg, + data_paths=sub_data_dct, ) + rpool.ingress_raw_anat_data() + rpool.gather_pipes(rpool.wf, cfg, all_types=True) + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_pipeconfig_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool( + cfg=cfg, + data_paths=sub_data_dct, + ) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) - rpool.gather_pipes(wf, cfg, all=True) - - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_pipeconfig_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - rpool = ingress_pipeconfig_paths(cfg, rpool, sub_data_dct, unique_id) - - rpool.gather_pipes(wf, cfg, all=True) - - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_anat_preproc_stack(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) +@pytest.mark.parametrize("preconfig", ["anat-only"]) +def test_build_anat_preproc_stack( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) - + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) rpool.gather_pipes(wf, cfg) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_workflow(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) - - wf, _, _ = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) +@pytest.mark.parametrize("preconfig", ["default"]) +def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_workflow` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) + wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) rpool.gather_pipes(wf, cfg) - - wf.run() - - -# bids_dir = "/Users/steven.giavasis/data/HBN-SI_dataset/rawdata" -# test_dir = "/test_dir" - -# cfg = "/Users/hecheng.jin/GitHub/DevBranch/CPAC/resources/configs/pipeline_config_monkey-ABCD.yml" -cfg = "/Users/hecheng.jin/GitHub/pipeline_config_monkey-ABCDlocal.yml" -bids_dir = "/Users/hecheng.jin/Monkey/monkey_data_oxford/site-ucdavis" -test_dir = "/Users/hecheng.jin/GitHub/Test/T2preproc" - -# test_ingress_func_raw_data(cfg, bids_dir, test_dir) -# test_ingress_anat_raw_data(cfg, bids_dir, test_dir) -# test_ingress_pipeconfig_data(cfg, bids_dir, test_dir) -# test_build_anat_preproc_stack(cfg, bids_dir, test_dir) -if __name__ == "__main__": - test_build_workflow(cfg, bids_dir, test_dir) diff --git a/CPAC/pipeline/utils.py b/CPAC/pipeline/utils.py index 39acb6429f..6f6953fef2 100644 --- a/CPAC/pipeline/utils.py +++ b/CPAC/pipeline/utils.py @@ -18,31 +18,16 @@ from itertools import chain -from CPAC.func_preproc.func_motion import motion_estimate_filter from CPAC.utils.bids_utils import insert_entity +from CPAC.utils.configuration.configuration import Configuration -MOVEMENT_FILTER_KEYS = motion_estimate_filter.outputs +def name_fork( + resource_idx: str, cfg: Configuration, json_info: dict, out_dct: dict +) -> tuple[str, dict]: + """Create and insert entities for forkpoints.""" + from CPAC.func_preproc.func_motion import motion_estimate_filter -def name_fork(resource_idx, cfg, json_info, out_dct): - """Create and insert entities for forkpoints. - - Parameters - ---------- - resource_idx : str - - cfg : CPAC.utils.configuration.Configuration - - json_info : dict - - out_dct : dict - - Returns - ------- - resource_idx : str - - out_dct : dict - """ if cfg.switch_is_on( [ "functional_preproc", @@ -54,7 +39,7 @@ def name_fork(resource_idx, cfg, json_info, out_dct): filt_value = None _motion_variant = { _key: json_info["CpacVariant"][_key] - for _key in MOVEMENT_FILTER_KEYS + for _key in motion_estimate_filter.outputs if _key in json_info.get("CpacVariant", {}) } if "unfiltered-" in resource_idx: @@ -105,12 +90,6 @@ def present_outputs(outputs: dict, keys: list) -> dict: NodeBlocks that differ only by configuration options and relevant output keys. - Parameters - ---------- - outputs : dict - - keys : list of str - Returns ------- dict diff --git a/CPAC/qc/pipeline.py b/CPAC/qc/pipeline.py index 15d6b35e09..fd39ed5193 100644 --- a/CPAC/qc/pipeline.py +++ b/CPAC/qc/pipeline.py @@ -1,7 +1,7 @@ import pkg_resources as p from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.qc.qc import ( afni_Edge3, create_montage, diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index 95cb870430..61bb008a0e 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -67,13 +67,15 @@ import pandas as pd import nibabel as nib from nipype.interfaces import afni, fsl +from nipype.pipeline.engine import Node, Workflow from CPAC.generate_motion_statistics.generate_motion_statistics import ( DVARS_strip_t0, ImageTo1D, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.qc.qcmetrics import regisQ from CPAC.utils.interfaces.function import Function @@ -85,33 +87,29 @@ ] -def _connect_motion(wf, nodes, strat_pool, qc_file, pipe_num): +def _connect_motion( + wf: Workflow, nodes: dict, strat_pool: StratPool, qc_file: Node, pipe_num: int +) -> Workflow: """ Connect the motion metrics to the workflow. Parameters ---------- - wf : nipype.pipeline.engine.Workflow + wf The workflow to connect the motion metrics to. - nodes : dict + nodes Dictionary of nodes already collected from the strategy pool. - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool The current strategy pool. - qc_file : nipype.pipeline.engine.Node - A function node with the function ``generate_xcp_qc``. - - pipe_num : int - - Returns - ------- - wf : nipype.pipeline.engine.Workflow + qc_file + A function node with the function :py:func:`generate_xcp_qc` . """ # pylint: disable=invalid-name, too-many-arguments try: - nodes = {**nodes, "censor-indices": strat_pool.node_data("censor-indices")} + nodes = {**nodes, "censor-indices": strat_pool.get_data("censor-indices")} wf.connect( nodes["censor-indices"].node, nodes["censor-indices"].out, @@ -501,7 +499,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ) qc_file.inputs.desc = "preproc" qc_file.inputs.regressors = ( - strat_pool.node_data("regressors") + strat_pool.get_data("regressors") .node.name.split("regressors_")[-1][::-1] .split("_", 1)[-1][::-1] ) @@ -511,7 +509,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): op_string="-bin ", ) nodes = { - key: strat_pool.node_data(key) + key: strat_pool.get_data(key) for key in [ "bold", "desc-preproc_bold", @@ -526,13 +524,13 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ] if strat_pool.check_rpool(key) } - nodes["bold2template_mask"] = strat_pool.node_data( + nodes["bold2template_mask"] = strat_pool.get_data( ["space-template_desc-bold_mask", "space-EPItemplate_desc-bold_mask"] ) - nodes["template_mask"] = strat_pool.node_data( + nodes["template_mask"] = strat_pool.get_data( ["T1w-brain-template-mask", "EPI-template-mask"] ) - nodes["template"] = strat_pool.node_data( + nodes["template"] = strat_pool.get_data( ["T1w-brain-template-funcreg", "EPI-brain-template-funcreg"] ) resample_bold_mask_to_template = pe.Node( diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index da63e694e4..3673b267cf 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -17,7 +17,7 @@ # pylint: disable=too-many-lines,ungrouped-imports,wrong-import-order """Workflows for registration.""" -from typing import Optional +from typing import Optional, TYPE_CHECKING from voluptuous import RequiredFieldInvalid from nipype.interfaces import afni, ants, c3, fsl, utility as util @@ -26,7 +26,7 @@ from CPAC.anat_preproc.lesion_preproc import create_lesion_preproc from CPAC.func_preproc.utils import chunk_ts, split_ts_chunks from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.utils import ( change_itk_transform_type, check_transforms, @@ -39,10 +39,14 @@ seperate_warps_list, single_ants_xfm_to_list, ) +from CPAC.utils.configuration.configuration import Configuration from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import StratPool + def apply_transform( wf_name, @@ -2616,7 +2620,7 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): node, out = connect wf.connect(node, out, ants_rc, "inputspec.input_brain") - t1w_brain_template = strat_pool.node_data("T1w-brain-template") + t1w_brain_template = strat_pool.get_data("T1w-brain-template") wf.connect( t1w_brain_template.node, t1w_brain_template.out, @@ -2635,10 +2639,10 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): ) wf.connect(node, out, ants_rc, "inputspec.input_head") - t1w_template = strat_pool.node_data("T1w-template") + t1w_template = strat_pool.get_data("T1w-template") wf.connect(t1w_template.node, t1w_template.out, ants_rc, "inputspec.reference_head") - brain_mask = strat_pool.node_data( + brain_mask = strat_pool.get_data( [ "space-T1w_desc-brain_mask", "space-longitudinal_desc-brain_mask", @@ -5416,8 +5420,8 @@ def warp_tissuemask_to_template(wf, cfg, strat_pool, pipe_num, xfm, template_spa def warp_resource_to_template( wf: pe.Workflow, - cfg, - strat_pool, + cfg: Configuration, + strat_pool: "StratPool", pipe_num: int, input_resource: list[str] | str, xfm: str, @@ -5428,24 +5432,24 @@ def warp_resource_to_template( Parameters ---------- - wf : pe.Workflow + wf - cfg : CPAC.utils.configuration.Configuration + cfg - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool - pipe_num : int + pipe_num - input_resource : str or list + input_resource key for the resource to warp to template - xfm : str + xfm key for the transform to apply - reference : str, optional + reference key for reference if not using f'{template_space}-template' - time_series : boolean, optional + time_series resource to transform is 4D? Returns diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index 870d3fa36d..b19ad9ecc7 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -18,7 +18,7 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.reho.utils import * from CPAC.utils.interfaces import Function diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 13a4f72745..0c70370f7f 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -16,27 +16,37 @@ # License along with C-PAC. If not, see . """Tests for packaged templates.""" +from importlib.util import find_spec import os import pytest from CPAC.pipeline import ALL_PIPELINE_CONFIGS -from CPAC.pipeline.engine import ingress_pipeconfig_paths, ResourcePool +from CPAC.pipeline.engine import ResourcePool from CPAC.utils.configuration import Preconfiguration from CPAC.utils.datasource import get_highest_local_res -@pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS) +@pytest.mark.parametrize( + "pipeline", + [ + pytest.param( + config, + marks=pytest.mark.skipif( + not find_spec("torch"), reason="torch required for NHP configs." + ), + ) + if config in ["monkey", "nhp-macaque"] + else config + for config in ALL_PIPELINE_CONFIGS + ], +) def test_packaged_path_exists(pipeline): - """ - Check that all local templates are included in image at at - least one resolution. - """ - rpool = ingress_pipeconfig_paths( - Preconfiguration(pipeline), ResourcePool(), "pytest" - ) + """Check that all local templates are included in at least one resolution.""" + rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest") + rpool.ingress_pipeconfig_paths() for resource in rpool.rpool.values(): - node = next(iter(resource.values())).get("data")[0] + node = next(iter(resource.values())).data[0] if hasattr(node.inputs, "template") and not node.inputs.template.startswith( "s3:" ): diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index d12aae7de9..bf855d578a 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -18,7 +18,7 @@ from nipype.interfaces.afni import preprocess from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.sca.utils import * from CPAC.timeseries.timeseries_analysis import ( get_roi_timeseries, diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index f769cf14b3..1fe3f4045f 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -19,7 +19,7 @@ from CPAC.anat_preproc.utils import mri_convert from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.registration.utils import check_transforms, generate_inverse_transform_flags from CPAC.seg_preproc.utils import ( diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py index 1defe4e2d1..7959688f86 100644 --- a/CPAC/surface/surf_preproc.py +++ b/CPAC/surface/surf_preproc.py @@ -17,7 +17,7 @@ import os from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho from CPAC.utils.interfaces import Function diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index a56bc33c74..18b1a4851a 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -22,7 +22,7 @@ get_connectome_method, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.datasource import ( create_roi_mask_dataflow, create_spatial_map_dataflow, diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 34e72d430e..08e6edb989 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -14,6 +14,9 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Utilities for using BIDS data.""" + +from base64 import b64decode import json import os import re @@ -91,8 +94,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif not f_dict["scantype"]: msg = ( f"Filename ({fname}) does not appear to contain" @@ -100,8 +102,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif "bold" in f_dict["scantype"] and not f_dict["task"]: msg = ( f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does" @@ -109,15 +110,13 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) return f_dict def bids_entities_from_filename(filename): - """Function to collect a list of BIDS entities from a given - filename. + """Collect a list of BIDS entities from a given filename. Parameters ---------- @@ -142,7 +141,7 @@ def bids_entities_from_filename(filename): def bids_match_entities(file_list, entities, suffix): - """Function to subset a list of filepaths by a passed BIDS entity. + """Subset a list of filepaths by a passed BIDS entity. Parameters ---------- @@ -250,10 +249,9 @@ def bids_remove_entity(name, key): def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): - """ + """Retrieve BIDS parameters for BIDS file corresponding to f_dict. - Retrieve the BIDS parameters from bids_config_dict for BIDS file - corresponding to f_dict. If an exact match for f_dict is not found + If an exact match for f_dict is not found the nearest match is returned, corresponding to the BIDS inheritance principle. @@ -316,12 +314,10 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): return params -def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): - # type: (dict, bool) -> dict - """ - Uses the BIDS principle of inheritance to build a data structure that - maps parameters in side car .json files to components in the names of - corresponding nifti files. +def bids_parse_sidecar( + config_dict: dict, dbg: bool = False, raise_error: bool = True +) -> dict: + """Use BIDS inheritance to map parameters in sidecar to corresponding NIfTI files. :param config_dict: dictionary that maps paths of sidecar json files (the key) to a dictionary containing the contents of the files (the values) @@ -428,9 +424,9 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): def bids_shortest_entity(file_list): - """Function to return the single file with the shortest chain of - BIDS entities from a given list, returning the first if more than - one have the same minimum length. + """Return the single file with the shortest chain of BIDS entities from a list. + + Return the first if more than one have the same minimum length. Parameters ---------- @@ -553,9 +549,7 @@ def bids_gen_cpac_sublist( raise_error=True, only_one_anat=True, ): - """ - Generates a CPAC formatted subject list from information contained in a - BIDS formatted set of data. + """Generate a CPAC formatted subject list from a BIDS dataset. Parameters ---------- @@ -910,8 +904,9 @@ def camelCase(string: str) -> str: # pylint: disable=invalid-name def combine_multiple_entity_instances(bids_str: str) -> str: - """Combines mutliple instances of a key in a BIDS string to a single - instance by camelCasing and concatenating the values. + """Combine mutliple instances of a key in a BIDS string to a single instance. + + camelCase and concatenate the values. Parameters ---------- @@ -950,8 +945,7 @@ def combine_multiple_entity_instances(bids_str: str) -> str: def insert_entity(resource, key, value): - """Insert a `f'{key}-{value}'` BIDS entity before `desc-` if - present or before the suffix otherwise. + """Insert a BIDS entity before `desc-` if present or before the suffix otherwise. Parameters ---------- @@ -983,7 +977,8 @@ def insert_entity(resource, key, value): return "_".join([*new_entities[0], f"{key}-{value}", *new_entities[1], suff]) -def load_yaml_config(config_filename, aws_input_creds): +def load_yaml_config(config_filename: str, aws_input_creds: str) -> dict | list: + """Load a YAML configuration file, locally or from AWS.""" if config_filename.lower().startswith("data:"): try: header, encoded = config_filename.split(",", 1) @@ -1020,8 +1015,7 @@ def load_yaml_config(config_filename, aws_input_creds): def cl_strip_brackets(arg_list): - """Removes '[' from before first and ']' from after final - arguments in a list of commandline arguments. + """Remove '[' from before first and ']' from after final arguments. Parameters ---------- @@ -1051,7 +1045,7 @@ def create_cpac_data_config( aws_input_creds=None, skip_bids_validator=False, only_one_anat=True, -): +) -> list[dict]: """ Create a C-PAC data config YAML file from a BIDS directory. @@ -1111,8 +1105,7 @@ def create_cpac_data_config( def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds): - """ - Loads the file as a check to make sure it is available and readable. + """Load the file to make sure it is available and readable. Parameters ---------- @@ -1210,8 +1203,7 @@ def res_in_filename(cfg, label): def sub_list_filter_by_labels(sub_list, labels): - """Function to filter a sub_list by provided BIDS labels for - specified suffixes. + """Filter a sub_list by provided BIDS labels for specified suffixes. Parameters ---------- @@ -1287,7 +1279,7 @@ def without_key(entity: str, key: str) -> str: def _t1w_filter(anat, shortest_entity, label): - """Helper function to filter T1w paths. + """Filter T1w paths. Parameters ---------- @@ -1318,7 +1310,7 @@ def _t1w_filter(anat, shortest_entity, label): def _sub_anat_filter(anat, shortest_entity, label): - """Helper function to filter anat paths in sub_list. + """Filter anat paths in sub_list. Parameters ---------- @@ -1341,7 +1333,7 @@ def _sub_anat_filter(anat, shortest_entity, label): def _sub_list_filter_by_label(sub_list, label_type, label): - """Function to filter a sub_list by a CLI-provided label. + """Filter a sub_list by a CLI-provided label. Parameters ---------- @@ -1410,7 +1402,7 @@ def _sub_list_filter_by_label(sub_list, label_type, label): def _match_functional_scan(sub_list_func_dict, scan_file_to_match): - """Function to subset a scan from a sub_list_func_dict by a scan filename. + """Subset a scan from a sub_list_func_dict by a scan filename. Parameters ---------- diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index 008e674c2d..8eba26bf21 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -20,6 +20,7 @@ import json from pathlib import Path import re +from typing import Optional from voluptuous import RequiredFieldInvalid from nipype.interfaces import utility as util @@ -30,7 +31,6 @@ from CPAC.utils.bids_utils import bids_remove_entity from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER -from CPAC.utils.utils import get_scan_params def bidsier_prefix(unique_id): @@ -64,7 +64,8 @@ def bidsier_prefix(unique_id): return "_".join(components) -def get_rest(scan, rest_dict, resource="scan"): +@Function.sig_imports(["from pathlib import Path"]) +def get_rest(scan: str, rest_dict: dict, resource: str = "scan") -> Path | str: """Return the path of the chosen resource in the functional file dictionary. scan: the scan/series name or label @@ -127,7 +128,7 @@ def select_model_files(model, ftest, model_name): return fts_file, con_file, grp_file, mat_file -def check_func_scan(func_scan_dct, scan): +def check_func_scan(func_scan_dct: dict, scan: str) -> None: """Run some checks on the functional timeseries-related files. For a given series/scan name or label. @@ -168,119 +169,6 @@ def check_func_scan(func_scan_dct, scan): raise ValueError(msg) -def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"): - """Return the functional timeseries-related file paths for each series/scan... - - ...from the dictionary of functional files described in the data - configuration (sublist) YAML file. - - Scan input (from inputnode) is an iterable. - """ - import nipype.interfaces.utility as util - - from CPAC.pipeline import nipype_pipeline_engine as pe - - wf = pe.Workflow(name=wf_name) - - inputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "scan", "creds_path", "dl_dir"], mandatory_inputs=True - ), - name="inputnode", - ) - - outputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "rest", "scan", "scan_params", "phase_diff", "magnitude"] - ), - name="outputspec", - ) - - # have this here for now because of the big change in the data - # configuration format - # (Not necessary with ingress - format does not comply) - if not rpool.check_rpool("derivatives-dir"): - check_scan = pe.Node( - function.Function( - input_names=["func_scan_dct", "scan"], - output_names=[], - function=check_func_scan, - as_module=True, - ), - name="check_func_scan", - ) - - check_scan.inputs.func_scan_dct = rest_dict - wf.connect(inputnode, "scan", check_scan, "scan") - - # get the functional scan itself - selectrest = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest", - ) - selectrest.inputs.rest_dict = rest_dict - selectrest.inputs.resource = "scan" - wf.connect(inputnode, "scan", selectrest, "scan") - - # check to see if it's on an Amazon AWS S3 bucket, and download it, if it - # is - otherwise, just return the local file path - check_s3_node = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="check_for_s3", - ) - - wf.connect(selectrest, "file_path", check_s3_node, "file_path") - wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") - wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") - check_s3_node.inputs.img_type = "func" - - wf.connect(inputnode, "subject", outputnode, "subject") - wf.connect(check_s3_node, "local_path", outputnode, "rest") - wf.connect(inputnode, "scan", outputnode, "scan") - - # scan parameters CSV - select_scan_params = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="select_scan_params", - ) - select_scan_params.inputs.rest_dict = rest_dict - select_scan_params.inputs.resource = "scan_parameters" - wf.connect(inputnode, "scan", select_scan_params, "scan") - - # if the scan parameters file is on AWS S3, download it - s3_scan_params = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="s3_scan_params", - ) - - wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") - wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") - wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") - wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") - - return wf - - def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): """Return the field map files... @@ -374,7 +262,7 @@ def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): return wf -def get_fmap_phasediff_metadata(data_config_scan_params): +def get_fmap_phasediff_metadata(data_config_scan_params: dict | str): """Return the scan parameters for a field map phasediff scan.""" if ( not isinstance(data_config_scan_params, dict) @@ -513,298 +401,6 @@ def match_epi_fmaps( return (opposite_pe_epi, same_pe_epi) -def ingress_func_metadata( - wf, - cfg, - rpool, - sub_dict, - subject_id, - input_creds_path, - unique_id=None, - num_strat=None, -): - """Ingress metadata for functional scans.""" - name_suffix = "" - for suffix_part in (unique_id, num_strat): - if suffix_part is not None: - name_suffix += f"_{suffix_part}" - # Grab field maps - diff = False - blip = False - fmap_rp_list = [] - fmap_TE_list = [] - if "fmap" in sub_dict: - second = False - for orig_key in sub_dict["fmap"]: - gather_fmap = create_fmap_datasource( - sub_dict["fmap"], f"fmap_gather_{orig_key}_{subject_id}" - ) - gather_fmap.inputs.inputnode.set( - subject=subject_id, - creds_path=input_creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - gather_fmap.inputs.inputnode.scan = orig_key - - key = orig_key - if "epi" in key and not second: - key = "epi-1" - second = True - elif "epi" in key and second: - key = "epi-2" - - rpool.set_data(key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress") - rpool.set_data( - f"{key}-scan-params", - gather_fmap, - "outputspec.scan_params", - {}, - "", - "fmap_params_ingress", - ) - - fmap_rp_list.append(key) - - get_fmap_metadata_imports = ["import json"] - get_fmap_metadata = pe.Node( - Function( - input_names=["data_config_scan_params"], - output_names=[ - "dwell_time", - "pe_direction", - "total_readout", - "echo_time", - "echo_time_one", - "echo_time_two", - ], - function=get_fmap_phasediff_metadata, - imports=get_fmap_metadata_imports, - ), - name=f"{key}_get_metadata{name_suffix}", - ) - - wf.connect( - gather_fmap, - "outputspec.scan_params", - get_fmap_metadata, - "data_config_scan_params", - ) - - if "phase" in key: - # leave it open to all three options, in case there is a - # phasediff image with either a single EchoTime field (which - # usually matches one of the magnitude EchoTimes), OR - # a phasediff with an EchoTime1 and EchoTime2 - - # at least one of these rpool keys will have a None value, - # which will be sorted out in gather_echo_times below - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-TE1", - get_fmap_metadata, - "echo_time_one", - {}, - "", - "fmap_TE1_ingress", - ) - fmap_TE_list.append(f"{key}-TE1") - - rpool.set_data( - f"{key}-TE2", - get_fmap_metadata, - "echo_time_two", - {}, - "", - "fmap_TE2_ingress", - ) - fmap_TE_list.append(f"{key}-TE2") - - elif "magnitude" in key: - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-dwell", - get_fmap_metadata, - "dwell_time", - {}, - "", - "fmap_dwell_ingress", - ) - rpool.set_data( - f"{key}-pedir", - get_fmap_metadata, - "pe_direction", - {}, - "", - "fmap_pedir_ingress", - ) - rpool.set_data( - f"{key}-total-readout", - get_fmap_metadata, - "total_readout", - {}, - "", - "fmap_readout_ingress", - ) - - if "phase" in key or "mag" in key: - diff = True - - if re.match("epi_[AP]{2}", orig_key): - blip = True - - if diff: - calc_delta_ratio = pe.Node( - Function( - input_names=["effective_echo_spacing", "echo_times"], - output_names=["deltaTE", "ees_asym_ratio"], - function=calc_delta_te_and_asym_ratio, - imports=["from typing import Optional"], - ), - name=f"diff_distcor_calc_delta{name_suffix}", - ) - - gather_echoes = pe.Node( - Function( - input_names=[ - "echotime_1", - "echotime_2", - "echotime_3", - "echotime_4", - ], - output_names=["echotime_list"], - function=gather_echo_times, - ), - name="fugue_gather_echo_times", - ) - - for idx, fmap_file in enumerate(fmap_TE_list, start=1): - try: - node, out_file = rpool.get(fmap_file)[ - f"['{fmap_file}:fmap_TE_ingress']" - ]["data"] - wf.connect(node, out_file, gather_echoes, f"echotime_{idx}") - except KeyError: - pass - - wf.connect(gather_echoes, "echotime_list", calc_delta_ratio, "echo_times") - - # Add in nodes to get parameters from configuration file - # a node which checks if scan_parameters are present for each scan - scan_params = pe.Node( - Function( - input_names=[ - "data_config_scan_params", - "subject_id", - "scan", - "pipeconfig_tr", - "pipeconfig_tpattern", - "pipeconfig_start_indx", - "pipeconfig_stop_indx", - ], - output_names=[ - "tr", - "tpattern", - "template", - "ref_slice", - "start_indx", - "stop_indx", - "pe_direction", - "effective_echo_spacing", - ], - function=get_scan_params, - ), - name=f"bold_scan_params_{subject_id}{name_suffix}", - ) - scan_params.inputs.subject_id = subject_id - scan_params.inputs.set( - pipeconfig_start_indx=cfg.functional_preproc["truncation"]["start_tr"], - pipeconfig_stop_indx=cfg.functional_preproc["truncation"]["stop_tr"], - ) - - node, out = rpool.get("scan")["['scan:func_ingress']"]["data"] - wf.connect(node, out, scan_params, "scan") - - # Workaround for extracting metadata with ingress - if rpool.check_rpool("derivatives-dir"): - selectrest_json = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest_json", - ) - selectrest_json.inputs.rest_dict = sub_dict - selectrest_json.inputs.resource = "scan_parameters" - wf.connect(node, out, selectrest_json, "scan") - wf.connect(selectrest_json, "file_path", scan_params, "data_config_scan_params") - - else: - # wire in the scan parameter workflow - node, out = rpool.get("scan-params")["['scan-params:scan_params_ingress']"][ - "data" - ] - wf.connect(node, out, scan_params, "data_config_scan_params") - - rpool.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") - rpool.set_data("tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress") - rpool.set_data("template", scan_params, "template", {}, "", "func_metadata_ingress") - rpool.set_data( - "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" - ) - rpool.set_data("stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress") - rpool.set_data( - "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" - ) - - if diff: - # Connect EffectiveEchoSpacing from functional metadata - rpool.set_data( - "effectiveEchoSpacing", - scan_params, - "effective_echo_spacing", - {}, - "", - "func_metadata_ingress", - ) - node, out_file = rpool.get("effectiveEchoSpacing")[ - "['effectiveEchoSpacing:func_metadata_ingress']" - ]["data"] - wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") - rpool.set_data( - "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" - ) - rpool.set_data( - "ees-asym-ratio", - calc_delta_ratio, - "ees_asym_ratio", - {}, - "", - "ees_asym_ratio_ingress", - ) - - return wf, rpool, diff, blip, fmap_rp_list - - def create_general_datasource(wf_name): """Create a general-purpose datasource node.""" import nipype.interfaces.utility as util @@ -880,9 +476,16 @@ def create_check_for_s3_node( return check_s3_node +@function.Function.sig_imports( + ["from pathlib import Path", "from typing import Optional"] +) def check_for_s3( - file_path, creds_path=None, dl_dir=None, img_type="other", verbose=False -): + file_path: Path | str, + creds_path: Optional[Path | str] = None, + dl_dir: Optional[Path | str] = None, + img_type: str = "other", + verbose: bool = False, +) -> Path | str: """Check if passed-in file is on S3.""" # Import packages import os diff --git a/CPAC/utils/interfaces/function/function.py b/CPAC/utils/interfaces/function/function.py index 34d01373d5..2df6741717 100644 --- a/CPAC/utils/interfaces/function/function.py +++ b/CPAC/utils/interfaces/function/function.py @@ -156,28 +156,28 @@ class Function(NipypeFunction): def __init__( self, - input_names=None, - output_names="out", - function=None, - imports=None, - as_module=False, + input_names: Optional[str | list[str]] = None, + output_names: Optional[str | list[str]] = "out", + function: Optional[Callable] = None, + imports: Optional[list[str]] = None, + as_module: bool = False, **inputs, ): - """Initialize a :py:func`~CPAC.utils.interfaces.function.Function` interface. + """Initialize a :py:func:`~CPAC.utils.interfaces.function.Function` interface. Parameters ---------- - input_names : single str or list or None + input_names names corresponding to function inputs if ``None``, derive input names from function argument names - output_names : single str or list + output_names names corresponding to function outputs (default: 'out'). if list of length > 1, has to match the number of outputs - function : callable + function callable python object. must be able to execute in an - isolated namespace (possibly in concert with the ``imports`` + isolated namespace (possibly in concert with the `imports` parameter) - imports : list of strings + imports list of import statements that allow the function to execute in an otherwise empty namespace. If these collide with imports defined via the :py:meth:`Function.sig_imports` @@ -244,12 +244,11 @@ def sig_imports(imports: list[str]) -> Callable: Parameters ---------- - imports : list of str + imports import statements to import the function in an otherwise empty namespace. If these collide with imports defined via the - :py:meth:`Function.__init__` initialization method, the - imports given as a parameter here will be overridden by - those from the initializer. + :py:meth:`Function.__init__` method, the imports given as a parameter here + will be overridden by those from the initializer. Returns ------- diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index 67f4de5770..42d6848e9c 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -21,7 +21,7 @@ class Strategy: def __init__(self): - self._resource_pool = ResourcePool({}) + self._resource_pool = ResourcePool() self.leaf_node = None self.leaf_out_file = None self.name = [] @@ -29,9 +29,6 @@ def __init__(self): def append_name(self, name): self.name.append(name) - def get_name(self): - return self.name - def set_leaf_properties(self, node, out_file): self.leaf_node = node self.leaf_out_file = out_file @@ -57,7 +54,7 @@ def get_node_from_resource_pool(self, resource_key): @property def resource_pool(self): """Strategy's ResourcePool dict.""" - return self._resource_pool.get_entire_rpool() + return self._resource_pool.rpool @property def rpool(self): diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index ab896c6029..4d8f18dabe 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -7,7 +7,7 @@ import pytest from CPAC.func_preproc import get_motion_ref -from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.pipeline.engine.nodeblock import NodeBlockFunction from CPAC.utils.configuration import Configuration from CPAC.utils.monitoring.custom_logging import log_subprocess from CPAC.utils.tests import old_functions diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py new file mode 100644 index 0000000000..79197dd314 --- /dev/null +++ b/CPAC/utils/typing.py @@ -0,0 +1,24 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Type aliases for C-PAC.""" + +from typing import ForwardRef + +LIST_OF_LIST_OF_STR = str | list[ForwardRef("LIST_OF_LIST_OF_STR")] +# _PIPE_IDX = list[ForwardRef("PIPE_IDX")] | str | tuple[ForwardRef("PIPE_IDX"), ...] +# PIPE_IDX = TypeVar("PIPE_IDX", bound=_PIPE_IDX) +PIPE_IDX = list[str | tuple] | str | tuple diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index b459262993..8e179411ae 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -138,7 +138,7 @@ def get_flag_wf(wf_name="get_flag"): wf.connect(input_node, "in_flag", get_flag, "in_flag") -def read_json(json_file): +def read_json(json_file: str) -> dict: """Read a JSON file and return the contents as a dictionary.""" try: with open(json_file, "r") as f: @@ -224,6 +224,7 @@ def create_id_string( return combine_multiple_entity_instances(res_in_filename(cfg, out_filename)) +@Function.sig_imports(["import os", "import json"]) def write_output_json(json_data, filename, indent=3, basedir=None): """Write a dictionary to a JSON file.""" if not basedir: diff --git a/CPAC/vmhc/vmhc.py b/CPAC/vmhc/vmhc.py index 3c547a8e2f..e09f156dfb 100644 --- a/CPAC/vmhc/vmhc.py +++ b/CPAC/vmhc/vmhc.py @@ -3,7 +3,7 @@ from CPAC.image_utils import spatial_smoothing from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.utils.utils import check_prov_for_regtool from CPAC.vmhc import * diff --git a/dev/circleci_data/conftest.py b/dev/circleci_data/conftest.py new file mode 100644 index 0000000000..4d67fdac05 --- /dev/null +++ b/dev/circleci_data/conftest.py @@ -0,0 +1,19 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Pytest configuration for CircleCI-specific tests.""" + +from CPAC.conftest import bids_examples diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index f516b0c903..d4892fee3b 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -25,9 +25,9 @@ import pytest import semver -CPAC_DIR = str(Path(__file__).parent.parent.parent) -sys.path.append(CPAC_DIR) -DATA_DIR = os.path.join(CPAC_DIR, "dev", "circleci_data") +CPAC_DIR = Path(__file__).parent.parent.parent +sys.path.append(str(CPAC_DIR)) +DATA_DIR = CPAC_DIR / "dev/circleci_data" from CPAC.__main__ import utils as CPAC_main_utils # noqa: E402 @@ -70,9 +70,8 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): caplog.set_level(INFO) if multiword_connector == "-" and _BACKPORT_CLICK: return - os.chdir(DATA_DIR) - test_yaml = os.path.join(DATA_DIR, "data_settings.yml") - _delete_test_yaml(test_yaml) + os.chdir(str(DATA_DIR)) + test_yaml = DATA_DIR / "data_settings.yml" if multiword_connector == "_": data_config = CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") @@ -89,49 +88,50 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): assert "\n".join(caplog.messages).startswith( "\nGenerated a default data_settings YAML file for editing" ) - assert os.path.exists(test_yaml) + assert test_yaml.exists() _delete_test_yaml(test_yaml) -def test_new_settings_template(caplog, cli_runner): +def test_new_settings_template(bids_examples, caplog, cli_runner): """Test CLI ``utils new-settings-template``.""" caplog.set_level(INFO) - os.chdir(CPAC_DIR) - - example_dir = os.path.join(CPAC_DIR, "bids-examples") - if not os.path.exists(example_dir): - from git import Repo - - Repo.clone_from( - "https://github.com/bids-standard/bids-examples.git", example_dir - ) + example_dir = Path(CPAC_DIR / "bids-examples") + if not example_dir.exists(): + example_dir.symlink_to(bids_examples) + os.chdir(str(CPAC_DIR)) result = cli_runner.invoke( CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") ].commands["build"], - [os.path.join(DATA_DIR, "data_settings_bids_examples_ds051_default_BIDS.yml")], + [str(DATA_DIR / "data_settings_bids_examples_ds051_default_BIDS.yml")], ) - participant_yaml = os.path.join(DATA_DIR, "data_config_ds051.yml") - group_yaml = os.path.join(DATA_DIR, "group_analysis_participants_ds051.txt") + participant_yaml = DATA_DIR / "data_config_ds051.yml" + group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt" + + if example_dir.is_symlink() or example_dir.is_file(): + example_dir.unlink() + else: + from shutil import rmtree + rmtree(example_dir) assert result.exit_code == 0 assert "\n".join(caplog.messages).startswith( "\nGenerating data configuration file.." ) - assert os.path.exists(participant_yaml) - assert os.path.exists(group_yaml) + assert participant_yaml.exists() + assert group_yaml.exists() _delete_test_yaml(participant_yaml) _delete_test_yaml(group_yaml) def test_repickle(cli_runner): # noqa fn = "python_2_pickle.pkl" - pickle_path = os.path.join(DATA_DIR, fn) + pickle_path = str(DATA_DIR / fn) backups = [_Backup(pickle_path), _Backup(f"{pickle_path}z")] - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert ( @@ -139,7 +139,7 @@ def test_repickle(cli_runner): # noqa "pickle." in result.output ) - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert f"Pickle {fn} is a Python 3 pickle." in result.output @@ -157,9 +157,10 @@ def restore(self): w.write(self.data) -def _delete_test_yaml(test_yaml): - if os.path.exists(test_yaml): - os.remove(test_yaml) +def _delete_test_yaml(test_yaml: Path) -> None: + """Delete test YAML file.""" + if test_yaml.exists(): + os.remove(str(test_yaml)) def _test_repickle(pickle_path, gzipped=False): diff --git a/dev/circleci_data/test_in_image.sh b/dev/circleci_data/test_in_image.sh index b62de84994..9420d7c1ab 100755 --- a/dev/circleci_data/test_in_image.sh +++ b/dev/circleci_data/test_in_image.sh @@ -4,7 +4,7 @@ export PATH=$PATH:/home/$(whoami)/.local/bin pip install -r /code/dev/circleci_data/requirements.txt # run test with coverage as module -python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC +python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --capture=no --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC echo "$?" > test-results/exitcode