diff --git a/.github/Dockerfiles/C-PAC.develop-ABCD-HCP-bionic.Dockerfile b/.github/Dockerfiles/C-PAC.develop-ABCD-HCP-bionic.Dockerfile index 2a5760ebb7..d62a0a02a2 100644 --- a/.github/Dockerfiles/C-PAC.develop-ABCD-HCP-bionic.Dockerfile +++ b/.github/Dockerfiles/C-PAC.develop-ABCD-HCP-bionic.Dockerfile @@ -1,3 +1,13 @@ +#!/bin/bash +# Copyright (C) 2022-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . FROM ghcr.io/fcp-indi/c-pac/stage-base:abcd-hcp-v1.8.7.dev1 LABEL org.opencontainers.image.description "Full C-PAC image with software dependencies version-matched to [ABCD-HCP BIDS fMRI Pipeline](https://github.com/DCAN-Labs/abcd-hcp-pipeline/blob/e480a8f99534f1b05f37bf44c64827384b69b383/Dockerfile)" LABEL org.opencontainers.image.source https://github.com/FCP-INDI/C-PAC @@ -12,9 +22,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # Link libraries for Singularity images RUN ldconfig \ diff --git a/.github/Dockerfiles/C-PAC.develop-fMRIPrep-LTS-xenial.Dockerfile b/.github/Dockerfiles/C-PAC.develop-fMRIPrep-LTS-xenial.Dockerfile index 0e9cd3d899..e63f278cb8 100644 --- a/.github/Dockerfiles/C-PAC.develop-fMRIPrep-LTS-xenial.Dockerfile +++ b/.github/Dockerfiles/C-PAC.develop-fMRIPrep-LTS-xenial.Dockerfile @@ -1,3 +1,13 @@ +#!/bin/bash +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . FROM ghcr.io/fcp-indi/c-pac/stage-base:fmriprep-lts-v1.8.7.dev1 LABEL org.opencontainers.image.description "Full C-PAC image with software dependencies version-matched to [fMRIPrep LTS](https://reproducibility.stanford.edu/fmriprep-lts#long-term-support-lts)" LABEL org.opencontainers.image.source https://github.com/FCP-INDI/C-PAC @@ -12,9 +22,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # link libraries & clean up RUN sed -i 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen \ diff --git a/.github/Dockerfiles/C-PAC.develop-jammy.Dockerfile b/.github/Dockerfiles/C-PAC.develop-jammy.Dockerfile index f9ced12d7b..57d737962c 100644 --- a/.github/Dockerfiles/C-PAC.develop-jammy.Dockerfile +++ b/.github/Dockerfiles/C-PAC.develop-jammy.Dockerfile @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -28,9 +28,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # link libraries & clean up # link libraries & clean up diff --git a/.github/Dockerfiles/C-PAC.develop-lite-jammy.Dockerfile b/.github/Dockerfiles/C-PAC.develop-lite-jammy.Dockerfile index 1f6f3a9ec9..98280b0b80 100644 --- a/.github/Dockerfiles/C-PAC.develop-lite-jammy.Dockerfile +++ b/.github/Dockerfiles/C-PAC.develop-lite-jammy.Dockerfile @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -29,9 +29,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - rm -Rf /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run.py"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + rm -Rf /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run.py"] # link libraries & clean up # link libraries & clean up diff --git a/.github/scripts/autoversioning.sh b/.github/scripts/autoversioning.sh index 64cd698626..8ea229066d 100755 --- a/.github/scripts/autoversioning.sh +++ b/.github/scripts/autoversioning.sh @@ -18,7 +18,14 @@ # License along with C-PAC. If not, see . # Update version comment strings -cd CPAC +function wait_for_git_lock() { + while [ -f "./.git/index.lock" ]; do + echo "Waiting for the git lock file to be removed..." + sleep 1 + done +} + +cd CPAC || exit 1 VERSION=$(python -c "from info import __version__; print(('.'.join(('.'.join(__version__[::-1].split('-')[1].split('.')[1:])[::-1], __version__.split('-')[1])) if '-' in __version__ else __version__).split('+', 1)[0])") cd .. echo "v${VERSION}" > version @@ -30,7 +37,7 @@ else # Linux and others find ./CPAC/resources/configs -name "*.yml" -exec sed -i'' -r "${_SED_COMMAND}" {} \; fi -git add version +wait_for_git_lock && git add version VERSIONS=( `git show $(git log --pretty=format:'%h' -n 2 version | tail -n 1):version` `cat version` ) export PATTERN="(declare|typeset) -a" if [[ "$(declare -p VERSIONS)" =~ $PATTERN ]] @@ -52,11 +59,15 @@ then done unset IFS fi -git add CPAC/resources/configs .github/Dockerfiles +wait_for_git_lock && git add CPAC/resources/configs .github/Dockerfiles # Overwrite top-level Dockerfiles with the CI Dockerfiles cp .github/Dockerfiles/C-PAC.develop-jammy.Dockerfile Dockerfile cp .github/Dockerfiles/C-PAC.develop-ABCD-HCP-bionic.Dockerfile variant-ABCD-HCP.Dockerfile cp .github/Dockerfiles/C-PAC.develop-fMRIPrep-LTS-xenial.Dockerfile variant-fMRIPrep-LTS.Dockerfile cp .github/Dockerfiles/C-PAC.develop-lite-jammy.Dockerfile variant-lite.Dockerfile -git add *Dockerfile +while [ -f "/Users/jon.clucas/CNL/GitHub/C-PAC/.git/index.lock" ]; do + echo "Waiting for the lock file to be removed..." + sleep 1 +done +wait_for_git_lock && git add *Dockerfile diff --git a/.github/scripts/get_package_id.py b/.github/scripts/get_package_id.py index 358838f9f4..e0903cc50b 100644 --- a/.github/scripts/get_package_id.py +++ b/.github/scripts/get_package_id.py @@ -1,3 +1,19 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . """Get Package ID. Script to get GHCR ID string for a given owner + image tag @@ -11,7 +27,7 @@ def get_packages(owner, tag, api_token=None): - """Function to collect GHCR packages for a given owner & tag. + """Collect GHCR packages for a given owner & tag. Parameters ---------- @@ -32,7 +48,7 @@ def get_packages(owner, tag, api_token=None): api_token = os.environ.get("GITHUB_TOKEN", "") def fetch(url): - """Method to make API call and return response, given a URL. + """Make API call and return response, given a URL. Parameters ---------- @@ -82,7 +98,7 @@ def fetch(url): def id_from_tag(owner, image, tag, api_token=None): - """Function to return a package ID given an image version tag. + """Return a package ID given an image version tag. Parameters ---------- @@ -106,12 +122,12 @@ def id_from_tag(owner, image, tag, api_token=None): ] if len(versions): return versions[0] - else: - raise LookupError(f"Image not found: ghcr.io/{owner}/{image}:{tag}") + msg = f"Image not found: ghcr.io/{owner}/{image}:{tag}" + raise LookupError(msg) if __name__ == "__main__": - if len(sys.argv) == 4: - pass + if len(sys.argv) == 4: # noqa: PLR2004 + print(id_from_tag(*sys.argv[1:])) # noqa: T201 else: - pass + print(__doc__) # noqa: T201 diff --git a/.github/scripts/get_pr_base_shas.py b/.github/scripts/get_pr_base_shas.py index ac20c6933f..1895352d63 100644 --- a/.github/scripts/get_pr_base_shas.py +++ b/.github/scripts/get_pr_base_shas.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -15,3 +15,18 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . """Get base SHAs for open PRs.""" +import os + +from github import Github + +print( # noqa: T201 + " ".join( + [ + pr.base.sha + for pr in Github(os.environ.get("GITHUB_TOKEN")) + .get_repo(os.environ.get("GITHUB_REPOSITORY")) + .get_commit(os.environ.get("GITHUB_SHA")) + .get_pulls() + ] + ) +) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cedf425cc9..b197e03261 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,10 +29,12 @@ repos: hooks: - id: check-case-conflict - id: end-of-file-fixer + exclude: '.*\.?sv|.*\.pkl(z)?' - id: mixed-line-ending args: - --fix=lf - id: trailing-whitespace + exclude: '.*\.tsv' - id: check-json - id: pretty-format-json args: diff --git a/.ruff.toml b/.ruff.toml index cb5eb184c3..8a11cb15d7 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,10 +1,11 @@ extend-exclude = ["dev/docker_data/get-pip_23.0.1.py"] -extend-select = ["A", "C4", "D", "G", "I", "ICN", "NPY", "PL", "RET", "RSE", "RUF", "Q", "T20", "W"] +extend-select = ["A", "C4", "D", "EM", "F541", "G", "I", "ICN", "NPY", "PL", "RET", "RSE", "RUF", "Q", "T20", "UP032", "W"] +# proposed rules to add next release cylce: ["B904", "LOG007", "TRY002", "TRY201", "TRY400", "TRY401"] # variants still use 3.7 target-version = "py37" -[per-file-ignores] -"CPAC/func_preproc/func_preproc.py" = ["E402"] +[lint] +external = ["T20"] # Don't autoremove 'noqa` comments for these rules [lint.flake8-import-conventions.extend-aliases] "CPAC.pipeline.cpac_group_runner" = "cgr" @@ -19,7 +20,7 @@ combine-as-imports = true force-sort-within-sections = true known-first-party = ["CPAC"] no-lines-before = ["collab", "other-first-party", "local-folder"] -order-by-type = true +order-by-type = false section-order = ["future", "standard-library", "third-party", "collab", "other-first-party", "first-party", "local-folder"] [lint.isort.sections] @@ -39,3 +40,8 @@ max-statements = 100 [lint.pyupgrade] # Until variants Python ≥ 3.10 keep-runtime-typing = true + +[per-file-ignores] +"CPAC/func_preproc/func_preproc.py" = ["E402"] +"CPAC/utils/sklearn.py" = ["RUF003"] +"CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed diff --git a/CPAC/__main__.py b/CPAC/__main__.py index d05a977261..75e831e534 100644 --- a/CPAC/__main__.py +++ b/CPAC/__main__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2018-2022 C-PAC Developers +# Copyright (C) 2018-2024 C-PAC Developers # This file is part of C-PAC. @@ -15,12 +15,20 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +from logging import basicConfig, INFO import os +from typing import Union import click from click_aliases import ClickAliasedGroup import pkg_resources as p +from CPAC.utils.docs import version_report +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC") +basicConfig(format="%(message)s", level=INFO) + # CLI tree # # cpac @@ -55,6 +63,13 @@ def main(): @main.command() def version(): """Display environment version information.""" + import CPAC + + logger.info( + "Environment\n===========\n%s\nC-PAC version: %s", + version_report(), + CPAC.__version__, + ) @main.command() @@ -550,7 +565,7 @@ def test(): @test.command(aliases=["run_suite"]) @click.option("--list", "-l", "show_list", is_flag=True) @click.option("--filter", "-f", "pipeline_filter", default="") -def run_suite(show_list=False, pipeline_filter=""): +def run_suite(show_list: Union[bool, str] = False, pipeline_filter=""): from CPAC.pipeline import cpac_runner test_config_dir = p.resource_filename( @@ -579,7 +594,7 @@ def run_suite(show_list=False, pipeline_filter=""): ) if show_list: - pass + show_list = "\nAvailables pipelines:" no_params = False for config_file in os.listdir(test_config_dir): @@ -588,6 +603,7 @@ def run_suite(show_list=False, pipeline_filter=""): continue if show_list: + show_list += f"\n- {config_file[len('pipe-test_'):]}" continue pipe = os.path.join(test_config_dir, config_file) @@ -604,7 +620,7 @@ def run_suite(show_list=False, pipeline_filter=""): cpac_runner.run(data, pipe) if show_list: - pass + logger.info("%s\n", show_list) @test.group(cls=ClickAliasedGroup) diff --git a/CPAC/_entrypoints/__init__.py b/CPAC/_entrypoints/__init__.py new file mode 100644 index 0000000000..93e085e0c7 --- /dev/null +++ b/CPAC/_entrypoints/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Entrypoints for C-PAC containers.""" +__all__ = [] diff --git a/dev/docker_data/run-with-freesurfer.sh b/CPAC/_entrypoints/run-with-freesurfer.sh similarity index 89% rename from dev/docker_data/run-with-freesurfer.sh rename to CPAC/_entrypoints/run-with-freesurfer.sh index b1551b4512..ee5c25d97b 100755 --- a/dev/docker_data/run-with-freesurfer.sh +++ b/CPAC/_entrypoints/run-with-freesurfer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2022 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -9,4 +9,4 @@ # You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . source $FREESURFER_HOME/SetUpFreeSurfer.sh -/code/run.py "$@" +/code/CPAC/_entrypoints/run.py "$@" diff --git a/dev/docker_data/run.py b/CPAC/_entrypoints/run.py similarity index 87% rename from dev/docker_data/run.py rename to CPAC/_entrypoints/run.py index a1b21a097b..9126fe67c2 100755 --- a/dev/docker_data/run.py +++ b/CPAC/_entrypoints/run.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2018-2023 C-PAC Developers +# Copyright (C) 2018-2024 C-PAC Developers # This file is part of C-PAC. @@ -15,17 +15,19 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Run C-PAC in a container.""" import argparse import datetime import os +from pathlib import Path import shutil import subprocess import sys import time +from typing import Optional, Union from warnings import simplefilter import yaml -from nipype import logging from CPAC import __version__, license_notice from CPAC.pipeline import AVAILABLE_PIPELINE_CONFIGS @@ -45,15 +47,15 @@ upgrade_pipeline_to_1_8, ) from CPAC.utils.docs import DOCS_URL_PREFIX -from CPAC.utils.monitoring import failed_to_start, log_nodes_cb +from CPAC.utils.monitoring import failed_to_start, FMLOGGER, log_nodes_cb, WFLOGGER from CPAC.utils.utils import update_nested_dict simplefilter(action="ignore", category=FutureWarning) -logger = logging.getLogger("nipype.workflow") DEFAULT_TMP_DIR = "/tmp" -def run(command, env=None): +def run(command: str, env: Optional[dict] = None) -> None: + """Run a command in the shell.""" if env is None: env = {} process = subprocess.Popen( @@ -66,18 +68,22 @@ def run(command, env=None): break -def parse_yaml(value): +def parse_yaml(value: str) -> dict: + """Parse a string as a YAML dictionary.""" try: config = yaml.safe_load(value) if not isinstance(config, dict): - raise TypeError("config must be a dictionary") + msg = "config must be a dictionary" + raise TypeError(msg) return config except Exception: # pylint: disable=raise-missing-from - raise argparse.ArgumentTypeError(f"Invalid configuration: '{value}'") + msg = f"Invalid configuration: '{value}'" + raise argparse.ArgumentTypeError(msg) -def resolve_aws_credential(source): +def resolve_aws_credential(source: Union[Path, str]) -> str: + """Set AWS credentials from a file or environment variable.""" if source == "env": from urllib.request import urlopen @@ -92,14 +98,14 @@ def resolve_aws_credential(source): ("AccessKeyId", "AWSAcessKeyId"), ("SecretAccessKey", "AWSSecretKey"), ]: - ofd.write("{0}={1}".format(vname, aws_creds[key])) + ofd.write(f"{vname}={aws_creds[key]}") return aws_input_creds if os.path.isfile(source): return source - else: - raise IOError("Could not find aws credentials {0}".format(source)) + msg = f"Could not find aws credentials {source}" + raise IOError(msg) def run_main(): @@ -469,6 +475,8 @@ def run_main(): if not args.group_file or not os.path.exists(args.group_file): import pkg_resources as p + WFLOGGER.warning("\nNo group analysis configuration file was supplied.\n") + args.group_file = p.resource_filename( "CPAC", os.path.join("resources", "configs", "group_config_template.yml"), @@ -483,15 +491,25 @@ def run_main(): if not os.path.exists(output_group): shutil.copyfile(args.group_file, output_group) except (Exception, IOError): - pass + FMLOGGER.warning( + "Could not create group analysis configuration file.\nPlease refer to the C-PAC documentation for group analysis setup." + ) else: - pass + WFLOGGER.warning( + "Please refer to the output directory for a template of the file and, after customizing to your analysis, add the flag\n\n --group_file %s\n\nto your `docker run` command\n", + output_group, + ) sys.exit(1) else: import CPAC.pipeline.cpac_group_runner as cgr + WFLOGGER.info( + "Starting group level analysis of data in %s using %s", + bids_dir, + args.group_file, + ) cgr.run(args.group_file) sys.exit(0) @@ -503,28 +521,28 @@ def run_main(): and not bids_dir_is_s3 and not os.path.exists(bids_dir) ): - sys.exit(1) + msg = f"Error! Could not find {bids_dir}" + raise FileNotFoundError(msg) # check to make sure that the output directory exists if not output_dir_is_s3 and not os.path.exists(output_dir): try: os.makedirs(output_dir) - except Exception: - sys.exit(1) + except Exception as e: + msg = f"Error! Could not find/create output dir {output_dir}" + raise FileNotFoundError(msg) from e # validate input dir (if skip_bids_validator is not set) if not args.data_config_file: if args.bids_validator_config: - run( - "bids-validator --config {config} {bids_dir}".format( - config=args.bids_validator_config, bids_dir=bids_dir - ) - ) + WFLOGGER.info("Running BIDS validator...") + run(f"bids-validator --config {args.bids_validator_config} {bids_dir}") elif args.skip_bids_validator: - pass + WFLOGGER.info("Skipping BIDS validator...") elif bids_dir_is_s3: - pass + WFLOGGER.info("Skipping BIDS validator for S3 datasets...") else: + WFLOGGER.info("Running BIDS validator...") run(f"bids-validator {bids_dir}") if args.preconfig: @@ -538,9 +556,9 @@ def run_main(): c = load_yaml_config(args.pipeline_file, args.aws_input_creds) if "pipeline_setup" not in c: - _url = f"{DOCS_URL_PREFIX}/user/pipelines/" "1.7-1.8-nesting-mappings" + _url = f"{DOCS_URL_PREFIX}/user/pipelines/1.7-1.8-nesting-mappings" - logger.warning( + WFLOGGER.warning( "\nC-PAC changed its pipeline configuration " "format in v1.8.0.\nSee %s for details.\n", _url, @@ -691,7 +709,7 @@ def run_main(): output_dir, "working" ) else: - logger.warning( + FMLOGGER.warning( "Cannot write working directory to S3 bucket. " "Either change the output directory to something " "local or turn off the --save_working_dir flag" @@ -713,12 +731,32 @@ def run_main(): ]["calculate_motion_after"] = True if args.participant_label: - pass + WFLOGGER.info( + "#### Running C-PAC for %s", ", ".join(args.participant_label) + ) else: - pass + WFLOGGER.info("#### Running C-PAC") + + WFLOGGER.info( + "Number of participants to run in parallel: %s", + c["pipeline_setup", "system_config", "num_participants_at_once"], + ) if not args.data_config_file: - pass + WFLOGGER.info("Input directory: %s", bids_dir) + + WFLOGGER.info( + "Output directory: %s\nWorking directory: %s\nLog directory: %s\n" + "Remove working directory: %s\nAvailable memory: %s (GB)\n" + "Available threads: %s\nNumber of threads for ANTs: %s", + c["pipeline_setup", "output_directory", "path"], + c["pipeline_setup", "working_directory", "path"], + c["pipeline_setup", "log_directory", "path"], + c["pipeline_setup", "working_directory", "remove_working_dir"], + c["pipeline_setup", "system_config", "maximum_memory_per_participant"], + c["pipeline_setup", "system_config", "max_cores_per_participant"], + c["pipeline_setup", "system_config", "num_ants_threads"], + ) # create a timestamp for writing config files # pylint: disable=invalid-name @@ -767,6 +805,11 @@ def run_main(): args.participant_ndx = os.environ["AWS_BATCH_JOB_ARRAY_INDEX"] if 0 <= participant_ndx < len(sub_list): + WFLOGGER.info( + "Processing data for participant %s (%s)", + args.participant_ndx, + sub_list[participant_ndx]["subject_id"], + ) sub_list = [sub_list[participant_ndx]] data_hash = hash_data_config(sub_list) data_config_file = ( @@ -774,7 +817,8 @@ def run_main(): f"{args.participant_ndx}_{st}.yml" ) else: - sys.exit(1) + msg = f"Participant ndx {participant_ndx} is out of bounds [0, {len(sub_list)})" + raise IndexError(msg) else: data_hash = hash_data_config(sub_list) data_config_file = f"cpac_data_config_{data_hash}_{st}.yml" @@ -823,13 +867,27 @@ def run_main(): monitoring = None if args.monitoring: + from json import JSONDecodeError + try: monitoring = monitor_server( c["pipeline_setup"]["pipeline_name"], c["pipeline_setup"]["log_directory"]["path"], ) - except: - pass + except ( + AttributeError, + FileNotFoundError, + JSONDecodeError, + KeyError, + OSError, + PermissionError, + TypeError, + ValueError, + ) as e: + WFLOGGER.warning( + "The run will continue without monitoring. Monitoring was configured to be enabled, but the monitoring server failed to start, so : %s\n", + e, + ) plugin_args = { "n_procs": int( @@ -858,6 +916,7 @@ def run_main(): ], } + WFLOGGER.info("Starting participant level processing") exitcode = CPAC.pipeline.cpac_runner.run( data_config_file, pipeline_config_file, @@ -872,7 +931,7 @@ def run_main(): if args.analysis_level == "test_config": if exitcode == 0: - logger.info( + WFLOGGER.info( "\nPipeline and data configuration files should" " have been written to %s and %s respectively.\n", pipeline_config_file, @@ -883,7 +942,7 @@ def run_main(): from CPAC.utils.monitoring import LOGTAIL for warning in LOGTAIL["warnings"]: - logger.warning("%s\n", warning.rstrip()) + WFLOGGER.warning("%s\n", warning.rstrip()) sys.exit(exitcode) @@ -894,5 +953,10 @@ def run_main(): except Exception as exception: # if we hit an exception before the pipeline starts to build but # we're still able to create a logfile, log the error in the file - failed_to_start(sys.argv[2] if len(sys.argv) > 2 else os.getcwd(), exception) + failed_to_start( + sys.argv[2] + if len(sys.argv) > 2 # noqa: PLR2004 + else os.getcwd(), + exception, + ) raise exception diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 11ea616378..b8335e71e8 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -24,13 +24,13 @@ from CPAC.anat_preproc.ants import init_brain_extraction_wf from CPAC.anat_preproc.utils import ( - VolumeRemoveIslands, create_3dskullstrip_arg_string, freesurfer_hemispheres, fsl_aff_to_rigid, fslmaths_command, mri_convert, normalize_wmparc, + VolumeRemoveIslands, wb_command, ) from CPAC.pipeline import nipype_pipeline_engine as pe diff --git a/CPAC/anat_preproc/ants.py b/CPAC/anat_preproc/ants.py index e6599ad7d1..f923ff0bd0 100644 --- a/CPAC/anat_preproc/ants.py +++ b/CPAC/anat_preproc/ants.py @@ -14,7 +14,7 @@ from collections import OrderedDict from logging import getLogger -from packaging.version import Version, parse as parseversion +from packaging.version import parse as parseversion, Version from pkg_resources import resource_filename as pkgr_fn from nipype.interfaces import utility as niu from nipype.interfaces.ants import Atropos, MultiplyImages, N4BiasFieldCorrection diff --git a/CPAC/anat_preproc/tests/test_anat_preproc.py b/CPAC/anat_preproc/tests/test_anat_preproc.py index 517d5dc074..329ba6d655 100755 --- a/CPAC/anat_preproc/tests/test_anat_preproc.py +++ b/CPAC/anat_preproc/tests/test_anat_preproc.py @@ -155,7 +155,7 @@ def test_anat_deoblique(self): # for i in range(0, len(header_values_input)): # # a = (header_values_de[i] == header_values_input[i]) - # if not (a.all() == True): + # if not (a.all()): # # not_match_count += 1 # print 'not_match_count: ', not_match_count diff --git a/CPAC/anat_preproc/utils.py b/CPAC/anat_preproc/utils.py index 239b383f40..407979f5f6 100644 --- a/CPAC/anat_preproc/utils.py +++ b/CPAC/anat_preproc/utils.py @@ -317,7 +317,7 @@ def create_3dskullstrip_arg_string( } if float(shrink_fac) != defaults["shrink_fac"]: - expr += " -shrink_fac {0}".format(shrink_fac) + expr += f" -shrink_fac {shrink_fac}" if not var_shrink_fac: expr += " -no_var_shrink_fac" @@ -329,7 +329,7 @@ def create_3dskullstrip_arg_string( expr += " -monkey" if float(shrink_fac_bot_lim) != defaults["shrink_fac_bot_lim"]: - expr += " -shrink_fac_bot_lim {0}".format(shrink_fac_bot_lim) + expr += f" -shrink_fac_bot_lim {shrink_fac_bot_lim}" if not use_edge: expr += " -no_use_edge" @@ -338,7 +338,7 @@ def create_3dskullstrip_arg_string( expr += " -no_avoid_vent" if int(niter) != defaults["niter"]: - expr += " -niter {0}".format(niter) + expr += f" -niter {niter}" if not pushout: expr += " -no_pushout" @@ -347,19 +347,19 @@ def create_3dskullstrip_arg_string( expr += " -no_touchup" if int(fill_hole) != defaults["fill_hole"]: - expr += " -fill_hole {0}".format(fill_hole) + expr += f" -fill_hole {fill_hole}" if not avoid_eyes: expr += " -no_avoid_eyes" if float(exp_frac) != defaults["exp_frac"]: - expr += " -exp_frac {0}".format(exp_frac) + expr += f" -exp_frac {exp_frac}" if int(NN_smooth) != defaults["NN_smooth"]: - expr += " -NN_smooth {0}".format(NN_smooth) + expr += f" -NN_smooth {NN_smooth}" if int(smooth_final) != defaults["smooth_final"]: - expr += " -smooth_final {0}".format(smooth_final) + expr += f" -smooth_final {smooth_final}" if push_to_edge: expr += " -push_to_edge" @@ -368,16 +368,16 @@ def create_3dskullstrip_arg_string( expr += " -use_skull" if float(perc_int) != defaults["perc_int"]: - expr += " -perc_int {0}".format(perc_int) + expr += f" -perc_int {perc_int}" if int(max_inter_iter) != defaults["max_inter_iter"]: - expr += " -max_inter_iter {0}".format(max_inter_iter) + expr += f" -max_inter_iter {max_inter_iter}" if float(blur_fwhm) != defaults["blur_fwhm"]: - expr += " -blur_fwhm {0}".format(blur_fwhm) + expr += f" -blur_fwhm {blur_fwhm}" if float(fac) != defaults["fac"]: - expr += " -fac {0}".format(fac) + expr += f" -fac {fac}" return expr @@ -463,12 +463,9 @@ def normalize_wmparc(source_file, target_file, xfm, out_file): """This module provides interfaces for workbench -volume-remove-islands commands""" -from nipype import logging from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec from nipype.interfaces.workbench.base import WBCommand -iflogger = logging.getLogger("nipype.interface") - class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): in_file = File( diff --git a/CPAC/aroma/aroma_test.py b/CPAC/aroma/aroma_test.py index d8af84175c..ddf6e89a03 100644 --- a/CPAC/aroma/aroma_test.py +++ b/CPAC/aroma/aroma_test.py @@ -91,7 +91,7 @@ def run_warp_nipype(inputs, output_dir=None, run=True): warp_workflow.connect(t_node, "outputspec.fmap_despiked", dataSink, "fmap_despiked") warp_workflow.connect(t_node, "outputspec.struct", dataSink, "epi2struct") warp_workflow.connect(t_node, "outputspec.anat_func", dataSink, "anat_func") - if run is True: + if run: plugin_args = {"n_procs": num_of_cores} warp_workflow.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args) return None diff --git a/CPAC/connectome/connectivity_matrix.py b/CPAC/connectome/connectivity_matrix.py index 552b9b16e9..2fd8524602 100644 --- a/CPAC/connectome/connectivity_matrix.py +++ b/CPAC/connectome/connectivity_matrix.py @@ -22,14 +22,13 @@ import numpy as np from nilearn.connectome import ConnectivityMeasure -from nipype import logging from nipype.interfaces import utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.utils.interfaces.function import Function from CPAC.utils.interfaces.netcorr import NetCorr, strip_afni_output_header +from CPAC.utils.monitoring import IFLOGGER -logger = logging.getLogger("nipype.workflow") connectome_methods = { "afni": {"Pearson": "", "Partial": "-part_corr"}, "nilearn": {"Pearson": "correlation", "Partial": "partial correlation"}, @@ -85,8 +84,8 @@ def get_connectome_method(method, tool): cm_method = connectome_methods[tool.lower()].get(method, NotImplemented) if cm_method is NotImplemented: warning_message = f"{method} has not yet been implemented for {tool} in C-PAC." - if logger: - logger.warning(NotImplementedError(warning_message)) + if IFLOGGER: + IFLOGGER.warning(NotImplementedError(warning_message)) else: warn(warning_message, category=Warning) return cm_method diff --git a/CPAC/cwas/cwas.py b/CPAC/cwas/cwas.py index d6fe902bce..23c73755ba 100644 --- a/CPAC/cwas/cwas.py +++ b/CPAC/cwas/cwas.py @@ -1,3 +1,20 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""CWAS module for CPAC.""" import os import numpy as np @@ -15,8 +32,9 @@ def joint_mask(subjects, mask_file=None): - """ - Creates a joint mask (intersection) common to all the subjects in a provided list + """Create a joint mask. + + A joint mask is an intersection common to all the subjects in a provided list and a provided mask. Parameters @@ -42,12 +60,14 @@ def joint_mask(subjects, mask_file=None): def calc_mdmrs(D, regressor, cols, permutations): + """Calculate pseudo-F values and significance probabilities.""" cols = np.array(cols, dtype=np.int32) F_set, p_set = mdmr(D, regressor, cols, permutations) return F_set, p_set def calc_subdists(subjects_data, voxel_range): + """Calculate the subdistributions of the subjects data.""" subjects, voxels, _ = subjects_data.shape D = np.zeros((len(voxel_range), subjects, subjects)) for i, v in enumerate(voxel_range): @@ -64,12 +84,14 @@ def calc_subdists(subjects_data, voxel_range): def calc_cwas( subjects_data, regressor, regressor_selected_cols, permutations, voxel_range ): + """Calculate CWAS pseudo-F values and significance probabilities.""" D = calc_subdists(subjects_data, voxel_range) F_set, p_set = calc_mdmrs(D, regressor, regressor_selected_cols, permutations) return F_set, p_set def pval_to_zval(p_set, permu): + """Convert p-values to z-values.""" inv_pval = 1 - p_set zvals = t.ppf(inv_pval, (len(p_set) - 1)) zvals[zvals == -inf] = permu / (permu + 1) @@ -86,8 +108,7 @@ def nifti_cwas( permutations, voxel_range, ): - """ - Performs CWAS for a group of subjects. + """Perform CWAS for a group of subjects. Parameters ---------- @@ -120,7 +141,7 @@ def nifti_cwas( regressor_data = pd.read_table( regressor_file, sep=None, engine="python", dtype={participant_column: str} ) - except: + except (KeyError, OSError, pd.errors.ParserError, ValueError): regressor_data = pd.read_table(regressor_file, sep=None, engine="python") regressor_data = regressor_data.astype({participant_column: str}) @@ -129,10 +150,12 @@ def nifti_cwas( regressor_cols = list(regressor_data.columns) if participant_column not in regressor_cols: - raise ValueError("Participant column was not found in regressor file.") + msg = "Participant column was not found in regressor file." + raise ValueError(msg) if participant_column in columns_string: - raise ValueError("Participant column can not be a regressor.") + msg = "Participant column can not be a regressor." + raise ValueError(msg) subject_ids = list(subjects.keys()) subject_files = list(subjects.values()) @@ -165,10 +188,11 @@ def nifti_cwas( ) if len(regressor.shape) == 1: regressor = regressor[:, np.newaxis] - elif len(regressor.shape) != 2: + elif len(regressor.shape) != 2: # noqa: PLR2004 raise ValueError("Bad regressor shape: %s" % str(regressor.shape)) if len(subject_files) != regressor.shape[0]: - raise ValueError("Number of subjects does not match regressor size") + msg = "Number of subjects does not match regressor size" + raise ValueError(msg) mask = nib.load(mask_file).get_fdata().astype("bool") mask_indices = np.where(mask) subjects_data = np.array( @@ -192,19 +216,22 @@ def nifti_cwas( def create_cwas_batches(mask_file, batches): + """Create batches of voxels to process in parallel.""" mask = nib.load(mask_file).get_fdata().astype("bool") voxels = mask.sum(dtype=int) return np.array_split(np.arange(voxels), batches) def volumize(mask_image, data): + """Create a volume from a mask and data.""" mask_data = mask_image.get_fdata().astype("bool") volume = np.zeros_like(mask_data, dtype=data.dtype) - volume[np.where(mask_data is True)] = data + volume[mask_data] = data return nib.Nifti1Image(volume, header=mask_image.header, affine=mask_image.affine) def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations): + """Merge CWAS batches into a single volume.""" _, _, voxel_range = zip(*cwas_batches) voxels = np.array(np.concatenate(voxel_range)) @@ -245,6 +272,7 @@ def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations): def zstat_image(zvals, mask_file): + """Create a zstat image from zvals and mask_file.""" mask_image = nib.load(mask_file) z_vol = volumize(mask_image, zvals) diff --git a/CPAC/cwas/mdmr.py b/CPAC/cwas/mdmr.py index 3667d490ff..1b4d5cba5c 100644 --- a/CPAC/cwas/mdmr.py +++ b/CPAC/cwas/mdmr.py @@ -81,7 +81,8 @@ def mdmr(D, X, columns, permutations): subjects = X.shape[0] if subjects != D.shape[1]: - raise Exception("# of subjects incompatible between X and D") + msg = "# of subjects incompatible between X and D" + raise Exception(msg) voxels = D.shape[0] Gs = np.zeros((subjects**2, voxels)) diff --git a/CPAC/cwas/tests/X.csv b/CPAC/cwas/tests/X.csv index 325dbaa158..35a03febe8 100644 --- a/CPAC/cwas/tests/X.csv +++ b/CPAC/cwas/tests/X.csv @@ -497,4 +497,4 @@ -6.0016411338403,-0.0306110373950633,0.693594772092793 0.72644264287163,-0.892103149707071,-1.11329689892794 -0.502785117108356,1.06028679375368,-0.638378336988827 --0.769191795416678,0.670541523833665,-0.750911450744791 +-0.769191795416678,0.670541523833665,-0.750911450744791 \ No newline at end of file diff --git a/CPAC/cwas/tests/Y.csv b/CPAC/cwas/tests/Y.csv index 4ad6b4eb7c..e634a55f02 100644 --- a/CPAC/cwas/tests/Y.csv +++ b/CPAC/cwas/tests/Y.csv @@ -497,4 +497,4 @@ -5.49743698379679,1.06476371365073,2.02929182493248,0.36101178725095,2.07158266359674,1.41066184124284,1.58472078057855,1.5680993932034,1.11069236710439,0.671411469197289 -0.673465564112587,-0.549773230876912,-0.37233368032179,-0.41387104831339,0.649072546411808,-0.0687630248171333,-0.552346312329938,-0.016128918042507,0.735792499227785,0.50277713688554 1.66191347310176,-0.121395569513754,-0.364046103819805,-1.06977422896796,-0.763443146363637,-1.58814893043643,0.328715088899028,-1.42100739994848,-1.44436319593838,-0.538179486683385 --0.382985118169075,-0.8048318876615,0.538835565795891,-1.87805520579645,-0.160051584385869,0.988250333453972,-1.04721019435067,-0.760466689366639,-1.17954042961936,-1.69802919614485 +-0.382985118169075,-0.8048318876615,0.538835565795891,-1.87805520579645,-0.160051584385869,0.988250333453972,-1.04721019435067,-0.760466689366639,-1.17954042961936,-1.69802919614485 \ No newline at end of file diff --git a/CPAC/cwas/tests/features/steps/base_cwas.py b/CPAC/cwas/tests/features/steps/base_cwas.py index ef5c93a3bf..1cf179e684 100755 --- a/CPAC/cwas/tests/features/steps/base_cwas.py +++ b/CPAC/cwas/tests/features/steps/base_cwas.py @@ -10,7 +10,8 @@ def custom_corrcoef(X, Y=None): Y = X if X.shape[0] != Y.shape[0]: - raise Exception("X and Y must have the same number of rows.") + msg = "X and Y must have the same number of rows." + raise Exception(msg) X = X.astype(float) Y = Y.astype(float) diff --git a/CPAC/cwas/tests/test_cwas.py b/CPAC/cwas/tests/test_cwas.py index 2e094bd8b1..f3b4490d47 100755 --- a/CPAC/cwas/tests/test_cwas.py +++ b/CPAC/cwas/tests/test_cwas.py @@ -1,23 +1,50 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test the CWAS pipeline.""" +from logging import basicConfig, INFO + import pytest +import nibabel as nib from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.cwas.tests") +basicConfig(format="%(message)s", level=INFO) @pytest.mark.skip(reason="requires RegressionTester") def test_adhd04(): + """Test CWAS with ADHD04 data.""" rt = RegressionTester("adhd04", "diagnosis", "diagnosis") rt.run() @pytest.mark.skip(reason="requires RegressionTester") def test_adhd40(): + """Test CWAS with ADHD40 data.""" rt = RegressionTester("adhd40", "diagnosis", "diagnosis + age + sex + meanFD") rt.run() @pytest.mark.skip(reason="requires specific local directory") class RegressionTester(object): - """ + """Test the CWAS pipeline in Python and R, and compare the results. + tmp = RegressionTester('adhd04', 'diagnosis', 'diagnosis') tmp.run(). """ @@ -27,7 +54,7 @@ def __init__( name, factor, formula, - base="/home2/data/Projects/CPAC_Regression_Test/" "2013-05-30_cwas", + base="/home2/data/Projects/CPAC_Regression_Test/2013-05-30_cwas", ): super(RegressionTester, self).__init__() self.base = base @@ -36,13 +63,18 @@ def __init__( self.formula = formula def run(self): + """Run the CWAS pipeline in Python and R, and compare the results.""" + logger.info("Python-Based CWAS") self.run_cwas() + logger.info("R-Based CWAS") self.run_connectir() + logger.info("Compare Python and R") self.compare_py_vs_r() def run_cwas(self): + """Run the CWAS pipeline in Python.""" import os os.chdir("%s/C-PAC" % self.base) @@ -80,10 +112,8 @@ def run_cwas(self): ### # Read in list of subject functionals - subjects_list = [ - l.strip().strip('"') - for l in open(sfile).readlines() # pylint: disable=consider-using-with - ] + with open(sfile) as _f: + subjects_list = [l.strip().strip('"') for l in _f.readlines()] # noqa: E741 # Read in design/regressor file regressors = np.loadtxt(rfile) @@ -111,21 +141,23 @@ def run_cwas(self): # pass # Run it! - time.clock() + start = time.clock() plugin_args = {"n_procs": 4} c.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args) - time.clock() + end = time.clock() + logger.info("time: %.2gs", end - start) def run_connectir(self): - """ - This runs distances and MDMR with connectir. + """Distances and MDMR with connectir. + This should be run after run_cwas(). """ import os import time - time.clock() + start = time.clock() + logger.info("Subject Distances") cmd = ( "connectir_subdist.R --infuncs1 %(basedir)s/configs/" "%(outbase)s_funcpaths_4mm.txt --brainmask1 %(basedir)s/" @@ -134,8 +166,10 @@ def run_connectir(self): "--forks 1 --threads 12 %(basedir)s/results_%(outbase)s.r" % {"basedir": self.base, "outbase": self.name} ) + logger.info("RUNNING: %s", cmd) os.system(cmd) + logger.info("MDMR") cmd = ( "connectir_mdmr.R --indir %(basedir)s/results_%(outbase)s.r " "--formula '%(formula)s' --model " @@ -150,13 +184,15 @@ def run_connectir(self): "nperms": 1000, } ) + logger.info("RUNNING: %s", cmd) os.system(cmd) - time.clock() + end = time.clock() + logger.info("time: %.2gs", end - start) @pytest.mark.skip(reason="No R installation in C-PAC image") def compare_py_vs_r(self): - """This will compare the output from the CPAC python vs the R connectir.""" + """Compare the output from the CPAC python vs the R connectir.""" import os os.chdir("%s/C-PAC" % self.base) @@ -239,18 +275,20 @@ def compare_py_vs_r(self): comp = np.allclose(py_hat, r_hat) assert_that(comp, "regressors as hat matrices") - comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 + comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004 assert_that(comp, "Fstats similarity") - comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 + comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004 assert_that(comp, "p-values similarity ") - comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 + comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 # noqa: PLR2004 assert_that(comp, "Fstats difference") - comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 + comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 # noqa: PLR2004 assert_that(comp, "p-values difference") + logger.info("tests were all good") + def test_cwas_connectir(): # add the code to run the same cwas with connectir @@ -302,15 +340,18 @@ def test_distances(): mask_file = op.join(sdir, "mask.nii.gz") sfile = "/home2/data/Projects/CWAS/share/nki/subinfo/40_Set1_N104/short_compcor_funcpaths_4mm_smoothed.txt" - subjects_list = [l.strip().strip('"') for l in open(sfile).readlines()] + subjects_list = [ + l.strip().strip('"') + for l in open(sfile).readlines() # noqa: E741 + ] subjects_list = subjects_list[:n] subjects_file_list = subjects_list - mask = nb.load(mask_file).get_fdata().astype("bool") + mask = nib.load(mask_file).get_fdata().astype("bool") mask_indices = np.where(mask) subjects_data = [ - nb.load(subject_file).get_fdata().astype("float64")[mask_indices].T + nib.load(subject_file).get_fdata().astype("float64")[mask_indices].T for subject_file in subjects_file_list ] subjects_data = np.array(subjects_data) diff --git a/CPAC/cwas/tests/test_pipeline_cwas.py b/CPAC/cwas/tests/test_pipeline_cwas.py index 5db2e377e2..3e1f05ad5d 100644 --- a/CPAC/cwas/tests/test_pipeline_cwas.py +++ b/CPAC/cwas/tests/test_pipeline_cwas.py @@ -1,3 +1,21 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test the CWAS pipeline.""" +from logging import basicConfig, INFO import os from urllib.error import URLError @@ -8,18 +26,24 @@ import nilearn.datasets from CPAC.cwas.pipeline import create_cwas +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.cwas.tests") +basicConfig(format="%(message)s", level=INFO) @pytest.mark.parametrize("z_score", [[0], [1], [0, 1], []]) def test_pipeline(z_score): + """Test the CWAS pipeline with z-score forking options.""" try: # pylint: disable=invalid-name cc = nilearn.datasets.fetch_atlas_craddock_2012() except URLError: + logger.info("Could not fetch atlas, skipping test") return try: os.mkdir("/tmp/cwas") - except: + except: # noqa: E722 pass abide_data = nilearn.datasets.fetch_abide_pcp(n_subjects=10) @@ -34,7 +58,7 @@ def test_pipeline(z_score): assert all(FID in images[i] for i, FID in enumerate(pheno.FILE_ID)) img = nib.load(cc["scorr_mean"]) img_data = np.copy(img.get_fdata()[:, :, :, 10]) - img_data[img_data != 2] = 0.0 + img_data[img_data != 2] = 0.0 # noqa: PLR2004 img = nib.Nifti1Image(img_data, img.affine) nib.save(img, "/tmp/cwas/roi.nii.gz") diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 516fc3a9c5..4196793f08 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -39,7 +39,7 @@ def create_afni_arg(shrink_fac): - return "-shrink_fac {0} ".format(shrink_fac) + return f"-shrink_fac {shrink_fac} " @nodeblock( @@ -282,12 +282,12 @@ def same_pe_direction_prep(same_pe_epi, func_mean): qwarp_input = func_mean elif same_pe_epi: skullstrip_outfile = os.path.join( - os.getcwd(), "{0}_mask.nii.gz".format(os.path.basename(same_pe_epi)) + os.getcwd(), f"{os.path.basename(same_pe_epi)}_mask.nii.gz" ) skullstrip_cmd = [ "3dAutomask", "-apply_prefix", - "{0}_masked.nii.gz".format(os.path.basename(same_pe_epi)), + f"{os.path.basename(same_pe_epi)}_masked.nii.gz", "-prefix", skullstrip_outfile, same_pe_epi, @@ -295,7 +295,7 @@ def same_pe_direction_prep(same_pe_epi, func_mean): subprocess.check_output(skullstrip_cmd) extract_brain_outfile = os.path.join( - os.getcwd(), "{0}_calc.nii.gz".format(os.path.basename(same_pe_epi)) + os.getcwd(), f"{os.path.basename(same_pe_epi)}_calc.nii.gz" ) extract_brain_cmd = [ "3dcalc", @@ -311,7 +311,7 @@ def same_pe_direction_prep(same_pe_epi, func_mean): subprocess.check_output(extract_brain_cmd) align_epi_outfile = os.path.join( - os.getcwd(), "{0}_calc_flirt.nii.gz".format(os.path.basename(same_pe_epi)) + os.getcwd(), f"{os.path.basename(same_pe_epi)}_calc_flirt.nii.gz" ) align_epi_cmd = [ "flirt", @@ -497,7 +497,7 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(opp_pe_to_func, "out_file", calc_blip_warp, "opp_pe") wf.connect(prep_qwarp_input, "qwarp_input", calc_blip_warp, "same_pe") - convert_afni_warp_imports = ["import os", "import nibabel as nb"] + convert_afni_warp_imports = ["import os", "import nibabel as nib"] convert_afni_warp = pe.Node( function.Function( input_names=["afni_warp"], diff --git a/CPAC/distortion_correction/tests/test_distortion_correction.py b/CPAC/distortion_correction/tests/test_distortion_correction.py index 7348757ad7..a1170effe7 100644 --- a/CPAC/distortion_correction/tests/test_distortion_correction.py +++ b/CPAC/distortion_correction/tests/test_distortion_correction.py @@ -91,7 +91,7 @@ def run_warp_nipype(inputs, output_dir=None, run=True): warp_workflow.connect(t_node, "outputspec.fmap_despiked", dataSink, "fmap_despiked") warp_workflow.connect(t_node, "outputspec.struct", dataSink, "epi2struct") warp_workflow.connect(t_node, "outputspec.anat_func", dataSink, "anat_func") - if run is True: + if run: plugin_args = {"n_procs": num_of_cores} warp_workflow.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args) return None diff --git a/CPAC/distortion_correction/utils.py b/CPAC/distortion_correction/utils.py index 0457933276..2b78dbfa4d 100644 --- a/CPAC/distortion_correction/utils.py +++ b/CPAC/distortion_correction/utils.py @@ -176,11 +176,12 @@ def phase_encode( """ meta_data = [dwell_time_one, dwell_time_two, ro_time_one, ro_time_two] if not any(meta_data): - raise Exception( + msg = ( "\n[!] Blip-FSL-TOPUP workflow: neither " "TotalReadoutTime nor DwellTime is present in the " "epi field map meta-data." ) + raise Exception(msg) # create text file acq_params = os.path.join(os.getcwd(), "acqparams.txt") @@ -197,10 +198,11 @@ def phase_encode( if ro_time_one and ro_time_two: ro_times = [f"-1 0 0 {ro_time_one}", f"1 0 0 {ro_time_two}"] else: - raise Exception( + msg = ( "[!] No dwell time or total readout time " "present for the acq-fMRI EPI field maps." ) + raise Exception(msg) elif unwarp_dir in ["y", "y-", "-y", "j", "-j", "j-"]: if dwell_time_one and dwell_time_two: dim = nib.load(phase_one).shape[1] @@ -210,12 +212,14 @@ def phase_encode( if ro_time_one and ro_time_two: ro_times = [f"0 -1 0 {ro_time_one}", f"0 1 0 {ro_time_two}"] else: - raise Exception( + msg = ( "[!] No dwell time or total readout time " "present for the acq-fMRI EPI field maps." ) + raise Exception(msg) else: - raise Exception(f"unwarp_dir={unwarp_dir} is unsupported.") + msg = f"unwarp_dir={unwarp_dir} is unsupported." + raise Exception(msg) # get number of volumes dims = [ diff --git a/CPAC/easy_thresh/easy_thresh.py b/CPAC/easy_thresh/easy_thresh.py index 0cf8799625..61c9f8f047 100644 --- a/CPAC/easy_thresh/easy_thresh.py +++ b/CPAC/easy_thresh/easy_thresh.py @@ -444,7 +444,8 @@ def copy_geom(infile_a, infile_b): subprocess.check_output(cmd) return out_file except Exception: - raise Exception("Error while using fslcpgeom to copy geometry") + msg = "Error while using fslcpgeom to copy geometry" + raise Exception(msg) def get_standard_background_img(in_file, file_parameters): @@ -476,12 +477,13 @@ def get_standard_background_img(in_file, file_parameters): group_mm = int(hdr.get_zooms()[2]) FSLDIR, MNI = file_parameters standard_path = os.path.join( - FSLDIR, "data/standard/", "{0}_T1_{1}mm_brain.nii.gz".format(MNI, group_mm) + FSLDIR, "data/standard/", f"{MNI}_T1_{group_mm}mm_brain.nii.gz" ) return os.path.abspath(standard_path) except Exception: - raise Exception("Error while loading background image") + msg = "Error while loading background image" + raise Exception(msg) def get_tuple(infile_a, infile_b): diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index e335820fb0..60c8ccf5c9 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -14,12 +14,8 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from nipype import logging - from CPAC.utils.datasource import create_func_datasource, ingress_func_metadata -logger = logging.getLogger("nipype.workflow") - def connect_func_ingress( workflow, strat_list, c, sub_dict, subject_id, input_creds_path, unique_id=None diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 03ed809b2b..a01f64b226 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -306,7 +306,7 @@ def get_mcflirt_rms_abs(rms_files): ) def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): if opt not in get_motion_ref.option_val: - raise ValueError( + msg = ( "\n\n[!] Error: The 'motion_correction_reference' " "parameter of the 'motion_correction' workflow " "must be one of:\n\t{0}.\n\nTool input: '{1}'" @@ -314,6 +314,7 @@ def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): " or ".join([f"'{val}'" for val in get_motion_ref.option_val]), opt ) ) + raise ValueError(msg) if opt == "mean": func_get_RPI = pe.Node( @@ -377,7 +378,7 @@ def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): def motion_correct_3dvolreg(wf, cfg, strat_pool, pipe_num): """Calculate motion parameters with 3dvolreg.""" if int(cfg.pipeline_setup["system_config"]["max_cores_per_participant"]) > 1: - chunk_imports = ["import nibabel as nb"] + chunk_imports = ["import nibabel as nib"] chunk = pe.Node( Function( input_names=["func_file", "n_chunks", "chunk_size"], @@ -667,12 +668,13 @@ def motion_correct_connections(wf, cfg, strat_pool, pipe_num, opt): """Check opt for valid option, then connect that option.""" motion_correct_options = valid_options["motion_correction"] if opt not in motion_correct_options: - raise KeyError( + msg = ( "\n\n[!] Error: The 'tool' parameter of the " "'motion_correction' workflow must be one of " f"{str(motion_correct_options).strip('[{()}]')}" f".\n\nTool input: {opt}\n\n" ) + raise KeyError(msg) return motion_correct[opt](wf, cfg, strat_pool, pipe_num) @@ -732,9 +734,9 @@ def motion_estimate_filter(wf, cfg, strat_pool, pipe_num, opt=None): notch_imports = [ "import os", "import numpy as np", - "from scipy.signal import iirnotch, filtfilt, firwin, " "freqz", + "from scipy.signal import iirnotch, filtfilt, firwin, freqz", "from matplotlib import pyplot as plt", - "from CPAC.func_preproc.utils import degrees_to_mm, " "mm_to_degrees", + "from CPAC.func_preproc.utils import degrees_to_mm, mm_to_degrees", ] notch = pe.Node( Function( diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 9e0509d84e..820ff2e14f 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -16,10 +16,7 @@ # License along with C-PAC. If not, see . """Functional preprocessing.""" # pylint: disable=ungrouped-imports,wrong-import-order,wrong-import-position -from nipype import logging from nipype.interfaces import afni, ants, fsl, utility as util - -logger = logging.getLogger("nipype.workflow") from nipype.interfaces.afni import preprocess, utils as afni_utils from CPAC.func_preproc.utils import nullify @@ -34,6 +31,7 @@ def collect_arguments(*args): + """Collect arguments.""" command_args = [] if args[0]: command_args += [args[1]] @@ -42,6 +40,7 @@ def collect_arguments(*args): def anat_refined_mask(init_bold_mask=True, wf_name="init_bold_mask"): + """Generate an anatomically refined mask.""" wf = pe.Workflow(name=wf_name) input_node = pe.Node( @@ -76,7 +75,7 @@ def anat_refined_mask(init_bold_mask=True, wf_name="init_bold_mask"): wf.connect(func_single_volume, "out_file", func_tmp_brain, "in_file_a") # 2.1 get a tmp func brain mask - if init_bold_mask is True: + if init_bold_mask: # 2.1.1 N4BiasFieldCorrection single volume of raw_func func_single_volume_n4_corrected = pe.Node( interface=ants.N4BiasFieldCorrection( @@ -164,7 +163,7 @@ def anat_refined_mask(init_bold_mask=True, wf_name="init_bold_mask"): wf.connect(reg_anat_mask_to_func, "out_file", func_mask, "operand_files") - if init_bold_mask is True: + if init_bold_mask: wf.connect(func_tmp_brain_mask_dil, "out_file", func_mask, "in_file") else: wf.connect(input_node, "init_func_brain_mask", func_mask, "in_file") @@ -175,7 +174,10 @@ def anat_refined_mask(init_bold_mask=True, wf_name="init_bold_mask"): def anat_based_mask(wf_name="bold_mask"): - """Reference `DCAN lab BOLD mask `_.""" + """Generate a functional mask from anatomical data. + + Reference `DCAN lab BOLD mask `_. + """ wf = pe.Workflow(name=wf_name) input_node = pe.Node( @@ -378,6 +380,7 @@ def create_wf_edit_func(wf_name="edit_func"): def slice_timing_wf(name="slice_timing", tpattern=None, tzero=None): + """Calculate corrected slice-timing.""" # allocate a workflow object wf = pe.Workflow(name=name) @@ -442,11 +445,10 @@ def slice_timing_wf(name="slice_timing", tpattern=None, tzero=None): def get_idx(in_files, stop_idx=None, start_idx=None): - """ - Method to get the first and the last slice for - the functional run. It verifies the user specified - first and last slice. If the values are not valid, it - calculates and returns the very first and the last slice. + """Get the first and the last slice for the functional run. + + Verify the user specified first and last slice. If the values are not valid, + calculate and return the very first and the last slice. Parameters ---------- @@ -479,7 +481,7 @@ def get_idx(in_files, stop_idx=None, start_idx=None): shape = hdr.get_data_shape() # Check to make sure the input file is 4-dimensional - if len(shape) != 4: + if len(shape) != 4: # noqa: PLR2004 raise TypeError("Input nifti file: %s is not a 4D file" % in_files) # Grab the number of volumes nvols = int(hdr.get_data_shape()[3]) @@ -505,6 +507,7 @@ def get_idx(in_files, stop_idx=None, start_idx=None): outputs=["desc-preproc_bold", "desc-reorient_bold"], ) def func_reorient(wf, cfg, strat_pool, pipe_num, opt=None): + """Reorient functional timeseries.""" func_deoblique = pe.Node( interface=afni_utils.Refit(), name=f"func_deoblique_{pipe_num}", @@ -544,6 +547,7 @@ def func_reorient(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-preproc_bold"], ) def func_scaling(wf, cfg, strat_pool, pipe_num, opt=None): + """Scale functional timeseries.""" scale_func_wf = create_scale_func_wf( scaling_factor=cfg.scaling_factor, wf_name=f"scale_func_{pipe_num}" ) @@ -567,6 +571,7 @@ def func_scaling(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def func_truncate(wf, cfg, strat_pool, pipe_num, opt=None): + """Truncate functional timeseries.""" # if cfg.functional_preproc['truncation']['start_tr'] == 0 and \ # cfg.functional_preproc['truncation']['stop_tr'] == None: # data, key = strat_pool.get_data("desc-preproc_bold", @@ -602,6 +607,7 @@ def func_truncate(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def func_despike(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate de-spiked functional timeseries in native space with AFNI.""" despike = pe.Node( interface=preprocess.Despike(), name=f"func_despiked_{pipe_num}", @@ -644,6 +650,7 @@ def func_despike(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def func_despike_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate de-spiked functional timeseries in template space with AFNI.""" despike = pe.Node( interface=preprocess.Despike(), name=f"func_despiked_template_{pipe_num}", @@ -698,6 +705,7 @@ def func_despike_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def func_slice_time(wf, cfg, strat_pool, pipe_num, opt=None): + """Genetare slice-time correctied timeseries.""" slice_time = slice_timing_wf( name="func_slice_timing_correction_" f"{pipe_num}", tpattern=cfg.functional_preproc["slice_timing_correction"]["tpattern"], @@ -737,6 +745,7 @@ def func_slice_time(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def bold_mask_afni(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate a functional mask with AFNI.""" func_get_brain_mask = pe.Node( interface=preprocess.Automask(), name=f"func_get_brain_mask_AFNI_{pipe_num}" ) @@ -762,6 +771,7 @@ def bold_mask_afni(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["space-bold_desc-brain_mask"], ) def bold_mask_fsl(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate functional mask with FSL.""" inputnode_bet = pe.Node( util.IdentityInterface( fields=[ @@ -940,7 +950,8 @@ def form_thr_string(thr): outputs=["space-bold_desc-brain_mask", "desc-ref_bold"], ) def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None): - """fMRIPrep-style BOLD mask + """fMRIPrep-style BOLD mask. + `Ref `_. """ # Initialize transforms with antsAI @@ -1139,6 +1150,7 @@ def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["space-bold_desc-brain_mask"], ) def bold_mask_anatomical_refined(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate the BOLD mask by basing it off of the refined anatomical brain mask.""" # binarize anat mask, in case it is not a binary mask. anat_brain_mask_bin = pe.Node( interface=fsl.ImageMaths(), name=f"anat_brain_mask_bin_{pipe_num}" @@ -1283,6 +1295,7 @@ def bold_mask_anatomical_refined(wf, cfg, strat_pool, pipe_num, opt=None): ) def bold_mask_anatomical_based(wf, cfg, strat_pool, pipe_num, opt=None): """Generate the BOLD mask by basing it off of the anatomical brain mask. + Adapted from `DCAN Lab's BOLD mask method from the ABCD pipeline `_. """ # 0. Take single volume of func @@ -1375,7 +1388,8 @@ def bold_mask_anatomical_based(wf, cfg, strat_pool, pipe_num, opt=None): ], ) def bold_mask_anatomical_resampled(wf, cfg, strat_pool, pipe_num, opt=None): - """Resample anatomical brain mask in standard space to get BOLD brain mask in standard space + """Resample anatomical brain mask to get BOLD brain mask in standard space. + Adapted from `DCAN Lab's BOLD mask method from the ABCD pipeline `_. """ # applywarp --rel --interp=spline -i ${T1wImage} -r ${ResampRefIm} --premat=$FSLDIR/etc/flirtsch/ident.mat -o ${WD}/${T1wImageFile}.${FinalfMRIResolution} @@ -1456,6 +1470,7 @@ def bold_mask_anatomical_resampled(wf, cfg, strat_pool, pipe_num, opt=None): ) def bold_mask_ccs(wf, cfg, strat_pool, pipe_num, opt=None): """Generate the BOLD mask by basing it off of the anatomical brain. + Adapted from `the BOLD mask method from the CCS pipeline `_. """ # Run 3dAutomask to generate func initial mask @@ -1592,6 +1607,7 @@ def bold_mask_ccs(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def bold_masking(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate a functional brain mask.""" func_edge_detect = pe.Node( interface=afni_utils.Calc(), name=f"func_extract_brain_{pipe_num}" ) @@ -1623,6 +1639,7 @@ def bold_masking(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-mean_bold"], ) def func_mean(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate a mean functional image.""" func_mean = pe.Node(interface=afni_utils.TStat(), name=f"func_mean_{pipe_num}") func_mean.inputs.options = "-mean" @@ -1646,6 +1663,7 @@ def func_mean(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-preproc_bold"], ) def func_normalize(wf, cfg, strat_pool, pipe_num, opt=None): + """Normalize a functional image.""" func_normalize = pe.Node( interface=fsl.ImageMaths(), name=f"func_normalize_{pipe_num}", @@ -1671,6 +1689,7 @@ def func_normalize(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["space-bold_desc-brain_mask"], ) def func_mask_normalize(wf, cfg, strat_pool, pipe_num, opt=None): + """Normalize a functional mask.""" func_mask_normalize = pe.Node( interface=fsl.ImageMaths(), name=f"func_mask_normalize_{pipe_num}", diff --git a/CPAC/func_preproc/utils.py b/CPAC/func_preproc/utils.py index 34951d5458..2fb21fedb1 100644 --- a/CPAC/func_preproc/utils.py +++ b/CPAC/func_preproc/utils.py @@ -27,10 +27,11 @@ def chunk_ts(func_file, n_chunks=None, chunk_size=None): elif chunk_size: n_chunks = int(trs / chunk_size) else: - raise Exception( + msg = ( "\n[!] Dev error: Either 'n_chunks' or 'chunk_size' " "arguments must be passed to 'chunk_ts' function.\n" ) + raise Exception(msg) for chunk_idx in range(0, n_chunks): if chunk_idx == n_chunks - 1: @@ -52,9 +53,9 @@ def split_ts_chunks(func_file, tr_ranges): for chunk_idx, tr_range in enumerate(tr_ranges): out_file = os.path.join( os.getcwd(), - os.path.basename(func_file).replace(ext, "_{0}{1}".format(chunk_idx, ext)), + os.path.basename(func_file).replace(ext, f"_{chunk_idx}{ext}"), ) - in_file = "{0}[{1}..{2}]".format(func_file, tr_range[0], tr_range[1]) + in_file = f"{func_file}[{tr_range[0]}..{tr_range[1]}]" cmd = ["3dcalc", "-a", in_file, "-expr", "a", "-prefix", out_file] @@ -230,7 +231,7 @@ def notch_filter_motion( filtered_params[0:3, :] = mm_to_degrees(filtered_params[0:3, :], head_radius=50) filtered_motion_params = os.path.join( - os.getcwd(), "{0}_filtered.1D".format(os.path.basename(motion_params)) + os.getcwd(), f"{os.path.basename(motion_params)}_filtered.1D" ) np.savetxt(filtered_motion_params, filtered_params.T, fmt="%f") diff --git a/CPAC/generate_motion_statistics/__init__.py b/CPAC/generate_motion_statistics/__init__.py index 23c8821955..08e45781ca 100644 --- a/CPAC/generate_motion_statistics/__init__.py +++ b/CPAC/generate_motion_statistics/__init__.py @@ -16,12 +16,12 @@ # License along with C-PAC. If not, see . """Functions for generating motion statistics.""" from .generate_motion_statistics import ( - ImageTo1D, calculate_DVARS, calculate_FD_J, calculate_FD_P, gen_motion_parameters, gen_power_parameters, + ImageTo1D, motion_power_statistics, ) from .utils import affine_file_from_params_file, affine_from_params diff --git a/CPAC/generate_motion_statistics/generate_motion_statistics.py b/CPAC/generate_motion_statistics/generate_motion_statistics.py index f2990ed39d..efa1994969 100644 --- a/CPAC/generate_motion_statistics/generate_motion_statistics.py +++ b/CPAC/generate_motion_statistics/generate_motion_statistics.py @@ -488,7 +488,8 @@ def calculate_FD_J( fd = np.append(0, rel_rms) else: - raise ValueError(f"calc_from {calc_from} not supported") + msg = f"calc_from {calc_from} not supported" + raise ValueError(msg) out_file = os.path.join(os.getcwd(), "FD_J.1D") np.savetxt(out_file, fd, fmt="%.8f") @@ -621,7 +622,7 @@ def avg_abs(v): with open(out_file, "w") as f: f.write(",".join(t for t, v in info)) f.write("\n") - f.write(",".join(v if type(v) == str else "{0:.6f}".format(v) for t, v in info)) + f.write(",".join(v if type(v) == str else f"{v:.6f}" for t, v in info)) f.write("\n") return out_file, info, maxdisp, relsdisp @@ -698,7 +699,7 @@ def gen_power_parameters( with open(out_file, "a") as f: f.write(",".join(t for t, v in info)) f.write("\n") - f.write(",".join(v if type(v) == str else "{0:.4f}".format(v) for t, v in info)) + f.write(",".join(v if type(v) == str else f"{v:.4f}" for t, v in info)) f.write("\n") return out_file, info diff --git a/CPAC/generate_motion_statistics/test/test_dvars.py b/CPAC/generate_motion_statistics/test/test_dvars.py index 9c26ca20e9..b044177088 100644 --- a/CPAC/generate_motion_statistics/test/test_dvars.py +++ b/CPAC/generate_motion_statistics/test/test_dvars.py @@ -3,7 +3,7 @@ import numpy as np import nibabel as nib -from CPAC.generate_motion_statistics import ImageTo1D, calculate_DVARS +from CPAC.generate_motion_statistics import calculate_DVARS, ImageTo1D np.random.seed(10) diff --git a/CPAC/group_analysis/group_analysis.py b/CPAC/group_analysis/group_analysis.py index 867e4ec4bc..2522271ff3 100644 --- a/CPAC/group_analysis/group_analysis.py +++ b/CPAC/group_analysis/group_analysis.py @@ -34,7 +34,8 @@ def get_operation(in_file): n_vol = int(hdr.get_data_shape()[3]) return "-abs -bin -Tmean -mul %d" % (n_vol) except: - raise IOError("Unable to load the input nifti image") + msg = "Unable to load the input nifti image" + raise IOError(msg) def label_zstat_files(zstat_list, con_file): @@ -55,7 +56,7 @@ def label_zstat_files(zstat_list, con_file): for zstat_file, con_name in zip(zstat_list, cons): # filename = os.path.basename(zstat_file) - new_name = "zstat_{0}".format(con_name) + new_name = f"zstat_{con_name}" # new_zstat_list.append(zstat_file.replace(filename, new_name)) new_zstat_list.append(new_name) diff --git a/CPAC/image_utils/spatial_smoothing.py b/CPAC/image_utils/spatial_smoothing.py index facebaf923..a09178bfba 100644 --- a/CPAC/image_utils/spatial_smoothing.py +++ b/CPAC/image_utils/spatial_smoothing.py @@ -52,11 +52,11 @@ def spatial_smoothing(wf_name, fwhm, input_image_type="func_derivative", opt=Non image_types = ["func_derivative", "func_derivative_multi", "func_4d", "func_mask"] if input_image_type not in image_types: - raise ValueError( - "Input image type {0} should be one of " "{1}".format( - input_image_type, ", ".join(image_types) - ) + msg = ( + f"Input image type {input_image_type} should be one of" + f" {', '.join(image_types)}" ) + raise ValueError(msg) if opt == "FSL": output_smooth_mem_gb = 4.0 diff --git a/CPAC/image_utils/statistical_transforms.py b/CPAC/image_utils/statistical_transforms.py index 83a01036d9..30819f7bf3 100644 --- a/CPAC/image_utils/statistical_transforms.py +++ b/CPAC/image_utils/statistical_transforms.py @@ -82,7 +82,7 @@ def calc_avg(workflow, output_name, strat, num_strat, map_node=False): if map_node: calc_average = pe.MapNode( interface=preprocess.Maskave(), - name="{0}_mean_{1}".format(output_name, num_strat), + name=f"{output_name}_mean_{num_strat}", iterfield=["in_file"], ) @@ -93,13 +93,13 @@ def calc_avg(workflow, output_name, strat, num_strat, map_node=False): function=extract_output_mean, as_module=True, ), - name="{0}_mean_to_txt_{1}".format(output_name, num_strat), + name=f"{output_name}_mean_to_txt_{num_strat}", iterfield=["in_file"], ) else: calc_average = pe.Node( interface=preprocess.Maskave(), - name="{0}_mean_{1}".format(output_name, num_strat), + name=f"{output_name}_mean_{num_strat}", ) mean_to_csv = pe.Node( @@ -109,7 +109,7 @@ def calc_avg(workflow, output_name, strat, num_strat, map_node=False): function=extract_output_mean, as_module=True, ), - name="{0}_mean_to_txt_{1}".format(output_name, num_strat), + name=f"{output_name}_mean_to_txt_{num_strat}", ) mean_to_csv.inputs.output_name = output_name @@ -120,7 +120,7 @@ def calc_avg(workflow, output_name, strat, num_strat, map_node=False): strat.append_name(calc_average.name) strat.update_resource_pool( - {"output_means.@{0}_average".format(output_name): (mean_to_csv, "output_mean")} + {f"output_means.@{output_name}_average": (mean_to_csv, "output_mean")} ) return strat diff --git a/CPAC/image_utils/tests/test_smooth.py b/CPAC/image_utils/tests/test_smooth.py index 538f9cc8aa..d1f8a8ec98 100644 --- a/CPAC/image_utils/tests/test_smooth.py +++ b/CPAC/image_utils/tests/test_smooth.py @@ -1,12 +1,33 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from logging import basicConfig, INFO import os import pytest from CPAC.image_utils import spatial_smoothing from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.monitoring.custom_logging import getLogger import CPAC.utils.test_init as test_utils from CPAC.utils.test_mocks import configuration_strategy_mock +logger = getLogger("CPAC.image_utils.tests") +basicConfig(format="%(message)s", level=INFO) + @pytest.mark.skip(reason="needs refactoring") def test_smooth(): @@ -45,7 +66,7 @@ def test_smooth(): num_strat, c, ) - + logger.info("%s", workflow.list_node_names()) workflow.run() correlations = [] @@ -54,14 +75,14 @@ def test_smooth(): out_name1 = os.path.join( c.workingDirectory, test_name, - "_fwhm_{0}/mean_functional_smooth_0/".format(fwhm), + f"_fwhm_{fwhm}/mean_functional_smooth_0/", "sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg_calc_tstat_maths.nii.gz", ) out_name2 = os.path.join( c.workingDirectory, test_name, - "_fwhm_{0}/mean_functional_smooth_nodes_0/".format(fwhm), + f"_fwhm_{fwhm}/mean_functional_smooth_nodes_0/", "sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg_calc_tstat_maths.nii.gz", ) @@ -109,7 +130,7 @@ def test_smooth_mapnode(): c, input_image_type="func_derivative_multi", ) - + logger.info("%s", workflow.list_node_names()) workflow.run() correlations = [] @@ -119,10 +140,8 @@ def test_smooth_mapnode(): os.path.join( c.workingDirectory, test_name, - "_fwhm_{0}/dr_tempreg_maps_smooth_multi_0/mapflow".format(fwhm), - "_dr_tempreg_maps_smooth_multi_0{0}/temp_reg_map_000{0}_maths.nii.gz".format( - n - ), + f"_fwhm_{fwhm}/dr_tempreg_maps_smooth_multi_0/mapflow", + f"_dr_tempreg_maps_smooth_multi_0{n}/temp_reg_map_000{n}_maths.nii.gz", ) for n in range(0, 10) ] @@ -131,10 +150,8 @@ def test_smooth_mapnode(): os.path.join( c.workingDirectory, test_name, - "_fwhm_{0}/dr_tempreg_maps_smooth_nodes_multi_0/mapflow".format(fwhm), - "_dr_tempreg_maps_smooth_nodes_multi_0{0}/temp_reg_map_000{0}_maths.nii.gz".format( - n - ), + f"_fwhm_{fwhm}/dr_tempreg_maps_smooth_nodes_multi_0/mapflow", + f"_dr_tempreg_maps_smooth_nodes_multi_0{n}/temp_reg_map_000{n}_maths.nii.gz", ) for n in range(0, 10) ] diff --git a/CPAC/isc/isc.py b/CPAC/isc/isc.py index 058ef13f0a..ac491c2876 100644 --- a/CPAC/isc/isc.py +++ b/CPAC/isc/isc.py @@ -1,6 +1,23 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import numpy as np from CPAC.utils import correlation +from CPAC.utils.monitoring import IFLOGGER from .utils import p_from_null, phase_randomize @@ -46,6 +63,7 @@ def isc_significance(ISC, min_null, max_null, two_sided=False): def isc_permutation(permutation, D, masked, collapse_subj=True, random_state=0): + IFLOGGER.info("Permutation %s", permutation) min_null = 1 max_null = -1 diff --git a/CPAC/isc/isfc.py b/CPAC/isc/isfc.py index 21ba03cc23..08b18cb9c9 100644 --- a/CPAC/isc/isfc.py +++ b/CPAC/isc/isfc.py @@ -1,6 +1,23 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import numpy as np from CPAC.utils import correlation +from CPAC.utils.monitoring import IFLOGGER from .utils import p_from_null, phase_randomize @@ -48,6 +65,7 @@ def isfc_significance(ISFC, min_null, max_null, two_sided=False): def isfc_permutation(permutation, D, masked, collapse_subj=True, random_state=0): + IFLOGGER.info("Permutation %s", permutation) min_null = 1 max_null = -1 diff --git a/CPAC/longitudinal_pipeline/longitudinal_preproc.py b/CPAC/longitudinal_pipeline/longitudinal_preproc.py index 701ccd8d34..25e4d23dd8 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_preproc.py +++ b/CPAC/longitudinal_pipeline/longitudinal_preproc.py @@ -1,19 +1,37 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2020-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Preprocessing for longitudinal pipelines.""" from collections import Counter from multiprocessing.dummy import Pool as ThreadPool import os import numpy as np -import six import nibabel as nib from nipype.interfaces import fsl import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.nifti_utils import nifti_image_input def read_ants_mat(ants_mat_file): + """Read a matrix, returning (translation) and (other transformations) matrices.""" if not os.path.exists(ants_mat_file): raise ValueError(str(ants_mat_file) + " does not exist.") @@ -30,9 +48,10 @@ def read_ants_mat(ants_mat_file): def read_mat(input_mat): + """Read a matrix, returning (translation) and (other transformations) matrices.""" if isinstance(input_mat, np.ndarray): mat = input_mat - elif isinstance(input_mat, six.string_types): + elif isinstance(input_mat, str): if os.path.exists(input_mat): mat = np.loadtxt(input_mat) else: @@ -46,7 +65,8 @@ def read_mat(input_mat): ) if mat.shape != (4, 4): - raise ValueError("ERROR norm_transformation: the matrix should be 4x4") + msg = "ERROR norm_transformation: the matrix should be 4x4" + raise ValueError(msg) # Translation vector translation = mat[0:3, 3] @@ -57,45 +77,48 @@ def read_mat(input_mat): def norm_transformations(translation, oth_transform): + """Calculate the sum of squares of norm translation and Frobenius norm.""" tr_norm = np.linalg.norm(translation) affine_norm = np.linalg.norm(oth_transform - np.identity(3), "fro") return pow(tr_norm, 2) + pow(affine_norm, 2) def norm_transformation(input_mat): - """ + """Calculate the sum of squares of norm translation and Frobenius norm. + Calculate the squared norm of the translation + squared Frobenium norm of the difference between other affine transformations and the identity from an fsl FLIRT transformation matrix. Parameters ---------- - input_mat : str or numpy.ndarray + input_mat : str or ~numpy.ndarray Either the path to text file matrix or a matrix already imported. Returns ------- - numpy.float64 - squared norm of the translation + squared Frobenius norm of the - difference between other affine transformations and the identity + ~numpy.float64 + squared norm of the translation + squared Frobenius norm of the + difference between other affine transformations and the identity """ if isinstance(input_mat, np.ndarray): mat = input_mat - elif isinstance(input_mat, six.string_types): + elif isinstance(input_mat, str): if os.path.exists(input_mat): mat = np.loadtxt(input_mat) else: - raise IOError( - "ERROR norm_transformation: " + input_mat + " file does not exist" - ) + msg = f"ERROR norm_transformation: {input_mat} file does not exist" + raise IOError(msg) else: - raise TypeError( - "ERROR norm_transformation: input_mat should be" - + " either a str (file_path) or a numpy.ndarray matrix" + msg = ( + "ERROR norm_transformation: input_mat should be either a str" + " (file_path) or a numpy.ndarray matrix" ) + raise TypeError(msg) if mat.shape != (4, 4): - raise ValueError("ERROR norm_transformation: the matrix should be 4x4") + msg = "ERROR norm_transformation: the matrix should be 4x4" + raise ValueError(msg) # Translation vector translation = mat[0:3, 3] @@ -109,9 +132,9 @@ def norm_transformation(input_mat): def template_convergence( mat_file, mat_type="matrix", convergence_threshold=np.finfo(np.float64).eps ): - """ - Calculate the distance between transformation matrix with a matrix of no - transformation. + """Check that the deistance between matrices is smaller than the threshold. + + Calculate the distance between transformation matrix with a matrix of no transformation. Parameters ---------- @@ -127,17 +150,17 @@ def template_convergence( Returns ------- - + bool """ if mat_type == "matrix": translation, oth_transform = read_mat(mat_file) elif mat_type == "ITK": translation, oth_transform = read_ants_mat(mat_file) else: - raise ValueError( - "ERROR template_convergence: this matrix type does " + "not exist" - ) + msg = f"template_convergence: matrix type {mat_type} does not exist" + raise ValueError(msg) distance = norm_transformations(translation, oth_transform) + IFLOGGER.info("distance = %s", abs(distance)) return abs(distance) <= convergence_threshold @@ -149,9 +172,11 @@ def create_temporary_template( output_skull_path, avg_method="median", ): - """ - Average all the 3D images of the list into one 3D image - WARNING---the function assumes that all the images have the same header, + """Average all the 3D images of the list into one 3D image. + + Warnings + -------- + The function assumes that all the images have the same header, the output image will have the same header as the first image of the list. Parameters @@ -160,22 +185,23 @@ def create_temporary_template( list of brain image paths input_skull_list : list of str list of skull image paths - output_brain_path : Nifti1Image + output_brain_path : ~nibabel.Nifti1Image temporary longitudinal brain template - output_skull_path : Nifti1Image + output_skull_path : ~nibabel.Nifti1Image temporary longitudinal skull template avg_method : str function names from numpy library such as 'median', 'mean', 'std' ... Returns ------- - output_brain_path : Nifti1Image + output_brain_path : ~nibabel.Nifti1Image temporary longitudinal brain template - output_skull_path : Nifti1Image + output_skull_path : ~nibabel.Nifti1Image temporary longitudinal skull template """ if not input_brain_list or not input_skull_list: - raise ValueError("ERROR create_temporary_template: image list is empty") + msg = "ERROR create_temporary_template: image list is empty" + raise ValueError(msg) if len(input_brain_list) == 1 and len(input_skull_list) == 1: return input_brain_list[0], input_skull_list[0] @@ -246,7 +272,8 @@ def register_img_list( matrix """ if not input_brain_list: - raise ValueError("ERROR register_img_list: image list is empty") + msg = "ERROR register_img_list: image list is empty" + raise ValueError(msg) basename_list = [ str(os.path.basename(img).split(".")[0]) for img in input_brain_list @@ -331,7 +358,8 @@ def template_creation_flirt( thread_pool=2, unique_id_list=None, ): - """ + """Create a temporary template from a list of images. + Parameters ---------- input_brain_list : list of str @@ -378,7 +406,8 @@ def template_creation_flirt( # return 'CECI_EST_UN_TEST' if not input_brain_list or not input_skull_list: - raise ValueError("ERROR create_temporary_template: image list is empty") + msg = "ERROR create_temporary_template: image list is empty" + raise ValueError(msg) warp_list = [] @@ -399,18 +428,17 @@ def template_creation_flirt( ) for img in input_brain_list ] - else: - if len(unique_id_list) == len(input_brain_list): - warp_list_filenames = [ - os.path.join( - os.getcwd(), - str(os.path.basename(img).split(".")[0]) - + "_" - + unique_id_list[i] - + "_anat_to_template.mat", - ) - for i, img in enumerate(input_brain_list) - ] + elif len(unique_id_list) == len(input_brain_list): + warp_list_filenames = [ + os.path.join( + os.getcwd(), + str(os.path.basename(img).split(".")[0]) + + "_" + + unique_id_list[i] + + "_anat_to_template.mat", + ) + for i, img in enumerate(input_brain_list) + ] if isinstance(thread_pool, int): pool = ThreadPool(thread_pool) @@ -421,8 +449,9 @@ def template_creation_flirt( convergence_threshold = np.finfo(np.float64).eps if len(input_brain_list) == 1 or len(input_skull_list) == 1: - warnings.warn( - "input_brain_list or input_skull_list contains only 1 image, no need to calculate template" + IFLOGGER.warning( + "input_brain_list or input_skull_list contains only 1 image, " + "no need to calculate template" ) warp_list.append(np.identity(4, dtype=float)) # return an identity matrix return ( @@ -448,7 +477,8 @@ def template_creation_flirt( ] converged = all(convergence_list) else: - raise ValueError("init_reg must be a list of FLIRT nipype nodes files") + msg = "init_reg must be a list of FLIRT nipype nodes files" + raise ValueError(msg) else: output_brain_list = input_brain_list output_skull_list = input_skull_list @@ -559,14 +589,17 @@ def template_creation_flirt( def subject_specific_template( workflow_name="subject_specific_template", method="flirt" ): - """ + """Create a subject specific template from a list of images. + Parameters ---------- - workflow_name - method + workflow_name : str + + method : str Returns ------- + template_gen_node : ~nipype.pipeline.engine.Node """ imports = [ "import os", diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 1ab66ec97b..43dd1eb317 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -17,7 +17,6 @@ # License along with C-PAC. If not, see . import os -from nipype import logging from nipype.interfaces import fsl import nipype.interfaces.io as nio from indi_aws import aws_utils @@ -47,8 +46,6 @@ from CPAC.utils.strategy import Strategy from CPAC.utils.utils import check_config_resources, check_prov_for_regtool -logger = logging.getLogger("nipype.workflow") - @nodeblock( name="mask_T1w_longitudinal_template", @@ -122,7 +119,8 @@ def create_datasink( ) if not s3_write_access: - raise Exception("Not able to write to bucket!") + msg = "Not able to write to bucket!" + raise Exception(msg) except Exception as e: if ( @@ -140,11 +138,11 @@ def create_datasink( if map_node_iterfield is not None: ds = pe.MapNode( DataSink(infields=map_node_iterfield), - name="sinker_{}".format(datasink_name), + name=f"sinker_{datasink_name}", iterfield=map_node_iterfield, ) else: - ds = pe.Node(DataSink(), name="sinker_{}".format(datasink_name)) + ds = pe.Node(DataSink(), name=f"sinker_{datasink_name}") ds.inputs.base_directory = config.pipeline_setup["output_directory"]["path"] ds.inputs.creds_path = creds_path @@ -630,7 +628,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) rpool.set_data( - "from-T1w_to-longitudinal_mode-image_" "desc-linear_xfm", + "from-T1w_to-longitudinal_mode-image_desc-linear_xfm", select_sess, "warp_path", {}, diff --git a/CPAC/median_angle/median_angle.py b/CPAC/median_angle/median_angle.py index 450696fcb7..1433df8ac8 100644 --- a/CPAC/median_angle/median_angle.py +++ b/CPAC/median_angle/median_angle.py @@ -151,7 +151,8 @@ def calc_target_angle(mean_bolds, median_angles): import numpy as np if len(mean_bolds) != len(median_angles): - raise ValueError("Length of parameters do not match") + msg = "Length of parameters do not match" + raise ValueError(msg) X = np.ones((len(mean_bolds), 2)) X[:, 1] = np.array(mean_bolds) diff --git a/CPAC/network_centrality/network_centrality.py b/CPAC/network_centrality/network_centrality.py index 6fdbab18c4..e281812e38 100644 --- a/CPAC/network_centrality/network_centrality.py +++ b/CPAC/network_centrality/network_centrality.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Optional, Union -from nipype.interfaces.afni.preprocess import LFCD, DegreeCentrality +from nipype.interfaces.afni.preprocess import DegreeCentrality, LFCD from nipype.pipeline.engine import Workflow from CPAC.network_centrality.utils import ThresholdOptionError diff --git a/CPAC/network_centrality/pipeline.py b/CPAC/network_centrality/pipeline.py index 4c356f25ea..e486f8eff0 100644 --- a/CPAC/network_centrality/pipeline.py +++ b/CPAC/network_centrality/pipeline.py @@ -14,7 +14,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from nipype import logging from nipype.interfaces import fsl from CPAC.network_centrality.network_centrality import create_centrality_wf @@ -23,8 +22,6 @@ from CPAC.pipeline.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options -logger = logging.getLogger("nipype.workflow") - def connect_centrality_workflow( workflow, diff --git a/CPAC/network_centrality/utils.py b/CPAC/network_centrality/utils.py index baef80bb84..123509245b 100644 --- a/CPAC/network_centrality/utils.py +++ b/CPAC/network_centrality/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -24,6 +24,7 @@ from CPAC.pipeline.schema import valid_options from CPAC.utils.docs import docstring_parameter from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.typing import ITERABLE, LIST @@ -47,7 +48,7 @@ def convert_pvalue_to_r(datafile, p_value, two_tailed=False): correlation threshold value """ import numpy as np - import nibabel as nb + import nibabel as nib import scipy.stats # Get two-tailed distribution @@ -55,7 +56,7 @@ def convert_pvalue_to_r(datafile, p_value, two_tailed=False): p_value = p_value / 2 # Load in data and number of time pts - img = nb.load(datafile).get_fdata() + img = nib.load(datafile).get_fdata() t_pts = img.shape[-1] # N-2 degrees of freedom with Pearson correlation (two sample means) @@ -261,9 +262,7 @@ def sep_nifti_subbriks( if len(nii_dims) == 3 and len(out_names) == 1: pass else: - err_msg = ( - "out_names must have same number of elements as " "nifti sub-briks" - ) + err_msg = "out_names must have same number of elements as nifti sub-briks" raise Exception(err_msg) for brik, option in enumerate(weight_options): @@ -394,6 +393,7 @@ class ThresholdError(ValueError): def __init__(self, threshold_option, threshold): self.threshold_option = threshold_option self.threshold = threshold + IFLOGGER.error("%s", type(threshold)) self.message = f"For '{threshold_option}', threshold value must be " if threshold_option in ("Significance threshold", "Sparsity threshold"): self.message += "a positive number greater than 0 " diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 2a905ef15b..ed1ea29ee4 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -18,12 +18,9 @@ import numpy as np import nibabel as nib -from nipype import logging from nipype.interfaces import afni, fsl from nipype.interfaces.afni import utils as afni_utils import nipype.interfaces.utility as util - -# pylint: disable=wrong-import-order from nipype.pipeline.engine.workflows import Workflow import CPAC @@ -34,9 +31,9 @@ temporal_variance_mask, ) from CPAC.nuisance.utils.compcor import ( - TR_string_to_float, calc_compcor_components, cosine_filter, + TR_string_to_float, ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine import ResourcePool @@ -52,12 +49,11 @@ from CPAC.utils.interfaces.function import Function from CPAC.utils.interfaces.masktool import MaskTool from CPAC.utils.interfaces.pc import PC +from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.typing import LITERAL, TUPLE from CPAC.utils.utils import check_prov_for_regtool from .bandpass import afni_1dBandpass, bandpass_voxels -logger = logging.getLogger("nipype.workflow") - def choose_nuisance_blocks(cfg, rpool, generate_only=False): """ @@ -125,7 +121,7 @@ def form_mask_erosion_prop(erosion_prop): ero_imports = [ "import scipy.ndimage as nd", "import numpy as np", - "import nibabel as nb", + "import nibabel as nib", "import os", "from CPAC.seg_preproc.utils import _erode", ] @@ -232,31 +228,31 @@ def gather_nuisance( not functional_file_path.endswith(".nii") and not functional_file_path.endswith(".nii.gz") ): - raise ValueError( - "Invalid value for input_file ({}). Should be a nifti " - "file and should exist".format(functional_file_path) + msg = ( + f"Invalid value for input_file ({functional_file_path}). Should be a nifti " + "file and should exist" ) + raise ValueError(msg) try: functional_image = nib.load(functional_file_path) except: - raise ValueError( - "Invalid value for input_file ({}). Should be a nifti " - "file and should exist".format(functional_file_path) + msg = ( + f"Invalid value for input_file ({functional_file_path}). Should be a nifti " + "file and should exist" ) + raise ValueError(msg) if len(functional_image.shape) < 4 or functional_image.shape[3] < 2: - raise ValueError( - "Invalid input_file ({}). Expected 4D file.".format(functional_file_path) - ) + msg = f"Invalid input_file ({functional_file_path}). Expected 4D file." + raise ValueError(msg) regressor_length = functional_image.shape[3] # selector = selector.selector if not isinstance(selector, dict): - raise ValueError( - "Invalid type for selectors {0}, expecting dict".format(type(selector)) - ) + msg = f"Invalid type for selectors {type(selector)}, expecting dict" + raise ValueError(msg) regressor_files = { "aCompCor": acompcor_file_path, @@ -299,27 +295,25 @@ def gather_nuisance( } if not regressor_file or not os.path.isfile(regressor_file): - raise ValueError( - "Regressor type {0} specified in selectors " - "but the corresponding file was not found!".format(regressor_type) + msg = ( + f"Regressor type {regressor_type} specified in selectors " + "but the corresponding file was not found!" ) + raise ValueError(msg) try: regressors = np.loadtxt(regressor_file) - except: - raise + except (OSError, TypeError, UnicodeDecodeError, ValueError) as error: + msg = f"Could not read regressor {regressor_type} from {regressor_file}." + raise OSError(msg) from error if regressors.shape[0] != regressor_length: - raise ValueError( - "Number of time points in {0} ({1}) is " + msg = ( + f"Number of time points in {regressor_file} ({regressors.shape[0]}) is " "inconsistent with length of functional " - "file {2} ({3})".format( - regressor_file, - regressors.shape[0], - functional_file_path, - regressor_length, - ) + f"file {functional_file_path} ({regressor_length})" ) + raise ValueError(msg) if regressor_type == "Motion": num_regressors = 6 @@ -334,12 +328,11 @@ def gather_nuisance( regressors = regressors[:, 0:num_regressors] if regressors.shape[1] != num_regressors: - raise ValueError( - "Expecting {0} regressors for {1}, but " - "found {2} in file {3}.".format( - num_regressors, regressor_type, regressors.shape[1], regressor_file - ) + msg = ( + f"Expecting {num_regressors} regressors for {regressor_type}, but " + f"found {regressors.shape[1]} in file {regressor_file}." ) + raise ValueError(msg) # Add in the regressors, making sure to also add in the column name for regressor_index in range(regressors.shape[1]): @@ -350,37 +343,35 @@ def gather_nuisance( if type(summary_method) is dict: summary_method = summary_method["method"] - regressor_name = "{0}{1}{2}".format( - regressor_type, summary_method, regressor_index - ) + regressor_name = f"{regressor_type}{summary_method}{regressor_index}" column_names.append(regressor_name) nuisance_regressors.append(regressors[:, regressor_index]) if regressor_selector.get("include_delayed", False): - column_names.append("{0}Delay".format(regressor_name)) + column_names.append(f"{regressor_name}Delay") nuisance_regressors.append( np.append([0.0], regressors[0:-1, regressor_index]) ) if regressor_selector.get("include_backdiff", False): - column_names.append("{0}BackDiff".format(regressor_name)) + column_names.append(f"{regressor_name}BackDiff") nuisance_regressors.append( np.append([0.0], np.diff(regressors[:, regressor_index], n=1)) ) if regressor_selector.get("include_squared", False): - column_names.append("{0}Sq".format(regressor_name)) + column_names.append(f"{regressor_name}Sq") nuisance_regressors.append(np.square(regressors[:, regressor_index])) if regressor_selector.get("include_delayed_squared", False): - column_names.append("{0}DelaySq".format(regressor_name)) + column_names.append(f"{regressor_name}DelaySq") nuisance_regressors.append( np.square(np.append([0.0], regressors[0:-1, regressor_index])) ) if regressor_selector.get("include_backdiff_squared", False): - column_names.append("{0}BackDiffSq".format(regressor_name)) + column_names.append(f"{regressor_name}BackDiffSq") nuisance_regressors.append( np.square( np.append([0.0], np.diff(regressors[:, regressor_index], n=1)) @@ -393,18 +384,18 @@ def gather_nuisance( try: custom_regressor = np.loadtxt(custom_file_path) except: - raise ValueError( - "Could not read regressor {0} from {1}.".format( - "Custom", custom_file_path - ) + msg = "Could not read regressor {0} from {1}.".format( + "Custom", custom_file_path ) + raise ValueError(msg) if len(custom_regressor.shape) > 1 and custom_regressor.shape[1] > 1: - raise ValueError( + msg = ( "Invalid format for censor file {0}, should be a single " "column containing 1s for volumes to keep and 0s for volumes " "to censor.".format(custom_file_path) ) + raise ValueError(msg) column_names.append(custom_file_path) nuisance_regressors.append(custom_regressor) @@ -417,37 +408,42 @@ def gather_nuisance( regressor_file = censor_file_path if not regressor_file: - # ↓ This section is gross and temporary ↓ - len(selector["thresholds"]) - [thresh.get("value") for thresh in selector["thresholds"]] - # ↑ This section is gross and temporary ↑ + num_thresh = len(selector["thresholds"]) + IFLOGGER.warning( + "%s Censor specified with %sthreshold%s %s in selectors but threshold" + " was not reached.", + selector["method"], + "no " if num_thresh == 0 else "", + "" if num_thresh == 1 else "s", + [thresh.get("value") for thresh in selector["thresholds"]], + ) # All good to pass through if nothing to censor censor_volumes = np.ones((regressor_length,), dtype=int) else: try: censor_volumes = np.loadtxt(regressor_file) except: - raise ValueError( - "Could not read regressor {0} from {1}.".format( - regressor_type, regressor_file - ) + msg = ( + f"Could not read regressor {regressor_type} from {regressor_file}." ) + raise ValueError(msg) if ( len(censor_volumes.shape) > 1 and censor_volumes.shape[1] > 1 ) or not np.all(np.isin(censor_volumes, [0, 1])): - raise ValueError( - "Invalid format for censor file {0}, should be a single " + msg = ( + f"Invalid format for censor file {regressor_file}, should be a single " "column containing 1s for volumes to keep and 0s for volumes " - "to censor.".format(regressor_file) + "to censor." ) + raise ValueError(msg) censor_volumes = censor_volumes.flatten() censor_indices = np.where(censor_volumes == 0)[0] out_of_range_censors = censor_indices >= regressor_length if np.any(out_of_range_censors): - raise ValueError( + msg = ( "Censor volumes {0} are out of range" "on censor file {1}, calculated " "regressor length is {2}".format( @@ -456,6 +452,7 @@ def gather_nuisance( regressor_length, ) ) + raise ValueError(msg) if len(censor_indices) > 0: # if number_of_previous_trs_to_censor and number_of_subsequent_trs_to_censor @@ -480,7 +477,7 @@ def gather_nuisance( spike_regressors[censor_begin_index : censor_end_index + 1] = 1 for censor_index in np.where(spike_regressors == 1)[0]: - column_names.append("SpikeRegression{0}".format(censor_index)) + column_names.append(f"SpikeRegression{censor_index}") spike_regressor_index = np.zeros(regressor_length) spike_regressor_index[censor_index] = 1 nuisance_regressors.append(spike_regressor_index.flatten()) @@ -493,7 +490,7 @@ def gather_nuisance( with open(output_file_path, "w") as ofd: # write out the header information - ofd.write("# C-PAC {0}\n".format(CPAC.__version__)) + ofd.write(f"# C-PAC {CPAC.__version__}\n") ofd.write("# Nuisance regressors:\n") ofd.write("# " + "\t".join(column_names) + "\n") @@ -899,7 +896,7 @@ def create_regressor_workflow( custom_check_s3_node, "local_path", custom_ort_merge, - "in{}".format(i + 1), + f"in{i + 1}", ) pipeline_resource_pool["custom_ort_file_paths"] = ( @@ -922,7 +919,7 @@ def create_regressor_workflow( custom_check_s3_node, "local_path", custom_dsort_convolve_merge, - "in{}".format(i + 1), + f"in{i + 1}", ) if len(custom_dsort_check_s3_nodes) > 0: @@ -939,7 +936,7 @@ def create_regressor_workflow( custom_check_s3_node, "local_path", custom_dsort_merge, - "in{}".format(i + 1), + f"in{i + 1}", ) if len(custom_dsort_convolve_nodes) > 0: @@ -947,7 +944,7 @@ def create_regressor_workflow( custom_dsort_convolve_merge, "out", custom_dsort_merge, - "in{}".format(i + 1), + f"in{i + 1}", ) pipeline_resource_pool["custom_dsort_file_paths"] = ( @@ -971,12 +968,13 @@ def create_regressor_workflow( regressor_selector["summary"] = {} if type(regressor_selector["summary"]) is not dict: - raise ValueError( + msg = ( "Regressor {0} requires PC summary method, " "but {1} specified".format( regressor_type, regressor_selector["summary"] ) ) + raise ValueError(msg) if not regressor_selector["summary"].get("components"): regressor_selector["summary"]["components"] = 1 @@ -991,17 +989,15 @@ def create_regressor_workflow( if regressor_type == "aCompCor": if not regressor_selector.get("tissues"): - raise ValueError( - "Tissue type required for aCompCor, " "but none specified" - ) + msg = "Tissue type required for aCompCor, but none specified" + raise ValueError(msg) regressor_descriptor = {"tissue": regressor_selector["tissues"]} if regressor_type == "tCompCor": if not regressor_selector.get("threshold"): - raise ValueError( - "Threshold required for tCompCor, " "but none specified." - ) + msg = "Threshold required for tCompCor, but none specified." + raise ValueError(msg) regressor_descriptor = { "tissue": "FunctionalVariance-{}".format( @@ -1121,19 +1117,17 @@ def create_regressor_workflow( regressor_descriptor["erosion"] = "Eroded" if not regressor_selector.get("summary"): - raise ValueError( - "Summary method required for {0}, " "but none specified".format( - regressor_type - ) + msg = ( + f"Summary method required for {regressor_type}, but none specified" ) + raise ValueError(msg) regressor_descriptor["extraction"] = regressor_selector["summary"]["method"] if regressor_descriptor["extraction"] in ["DetrendPC", "PC"]: if not regressor_selector["summary"].get("components"): - raise ValueError( - "Summary method PC requires components, " "but received none." - ) + msg = "Summary method PC requires components, but received none." + raise ValueError(msg) regressor_descriptor["extraction"] += "_{0}".format( regressor_selector["summary"]["components"] @@ -1157,7 +1151,7 @@ def create_regressor_workflow( if anatomical_at_resolution_key not in pipeline_resource_pool: anat_resample = pe.Node( interface=fsl.FLIRT(), - name="{}_flirt".format(anatomical_at_resolution_key), + name=f"{anatomical_at_resolution_key}_flirt", mem_gb=3.63, mem_x=(3767129957844731 / 1208925819614629174706176, "in_file"), ) @@ -1187,7 +1181,7 @@ def create_regressor_workflow( if functional_at_resolution_key not in pipeline_resource_pool: func_resample = pe.Node( interface=fsl.FLIRT(), - name="{}_flirt".format(functional_at_resolution_key), + name=f"{functional_at_resolution_key}_flirt", mem_gb=0.521, mem_x=(4394984950818853 / 302231454903657293676544, "in_file"), ) @@ -1271,7 +1265,7 @@ def create_regressor_workflow( # Merge mask paths to extract voxel timeseries merge_masks_paths = pe.Node( util.Merge(len(regressor_mask_file_resource_keys)), - name="{}_merge_masks".format(regressor_type), + name=f"{regressor_type}_merge_masks", ) for i, regressor_mask_file_resource_key in enumerate( regressor_mask_file_resource_keys @@ -1281,12 +1275,12 @@ def create_regressor_workflow( ] nuisance_wf.connect( - node, node_output, merge_masks_paths, "in{}".format(i + 1) + node, node_output, merge_masks_paths, f"in{i + 1}" ) union_masks_paths = pe.Node( MaskTool(outputtype="NIFTI_GZ"), - name="{}_union_masks".format(regressor_type), + name=f"{regressor_type}_union_masks", mem_gb=2.1, mem_x=(1708448960473801 / 1208925819614629174706176, "in_files"), ) @@ -1314,7 +1308,7 @@ def create_regressor_workflow( compcor_imports = [ "import os", "import scipy.signal as signal", - "import nibabel as nb", + "import nibabel as nib", "import numpy as np", "from CPAC.utils import safe_shape", ] @@ -1330,7 +1324,7 @@ def create_regressor_workflow( function=calc_compcor_components, imports=compcor_imports, ), - name="{}_DetrendPC".format(regressor_type), + name=f"{regressor_type}_DetrendPC", mem_gb=0.4, mem_x=( 3811976743057169 / 151115727451828646838272, @@ -1360,7 +1354,7 @@ def create_regressor_workflow( cosfilter_imports = [ "import os", "import numpy as np", - "import nibabel as nb", + "import nibabel as nib", "from nipype import logging", ] @@ -1371,7 +1365,7 @@ def create_regressor_workflow( function=cosine_filter, imports=cosfilter_imports, ), - name="{}_cosine_filter".format(regressor_type), + name=f"{regressor_type}_cosine_filter", mem_gb=8.0, ) nuisance_wf.connect( @@ -1386,7 +1380,7 @@ def create_regressor_workflow( output_names=["tr_float"], function=TR_string_to_float, ), - name="{}_tr_string2float".format(regressor_type), + name=f"{regressor_type}_tr_string2float", ) nuisance_wf.connect(inputspec, "tr", tr_string2float_node, "tr") @@ -1400,7 +1394,7 @@ def create_regressor_workflow( if "Detrend" in summary_method: detrend_node = pe.Node( afni.Detrend(args="-polort 1", outputtype="NIFTI"), - name="{}_detrend".format(regressor_type), + name=f"{regressor_type}_detrend", ) nuisance_wf.connect( @@ -1415,7 +1409,7 @@ def create_regressor_workflow( if "Norm" in summary_method: l2norm_node = pe.Node( afni.TStat(args="-l2norm", outputtype="NIFTI"), - name="{}_l2norm".format(regressor_type), + name=f"{regressor_type}_l2norm", ) nuisance_wf.connect( summary_method_input[0], @@ -1429,7 +1423,7 @@ def create_regressor_workflow( norm_node = pe.Node( afni.Calc(expr="a/b", outputtype="NIFTI"), - name="{}_norm".format(regressor_type), + name=f"{regressor_type}_norm", mem_gb=1.7, mem_x=( 1233286593342025 / 151115727451828646838272, @@ -1451,7 +1445,7 @@ def create_regressor_workflow( if "Mean" in summary_method: mean_node = pe.Node( afni.ROIStats(quiet=False, args="-1Dformat"), - name="{}_mean".format(regressor_type), + name=f"{regressor_type}_mean", mem_gb=5.0, ) @@ -1471,7 +1465,7 @@ def create_regressor_workflow( if "PC" in summary_method: std_node = pe.Node( afni.TStat(args="-nzstdev", outputtype="NIFTI"), - name="{}_std".format(regressor_type), + name=f"{regressor_type}_std", ) nuisance_wf.connect( summary_method_input[0], @@ -1485,7 +1479,7 @@ def create_regressor_workflow( standardized_node = pe.Node( afni.Calc(expr="a/b", outputtype="NIFTI"), - name="{}_standardized".format(regressor_type), + name=f"{regressor_type}_standardized", ) nuisance_wf.connect( summary_method_input[0], @@ -1503,7 +1497,7 @@ def create_regressor_workflow( pcs=regressor_selector["summary"]["components"], outputtype="NIFTI_GZ", ), - name="{}_pc".format(regressor_type), + name=f"{regressor_type}_pc", ) nuisance_wf.connect( @@ -1621,7 +1615,7 @@ def create_regressor_workflow( node, node_output = regressor_node nuisance_wf.connect( - node, node_output, voxel_nuisance_regressors_merge, "in{}".format(i + 1) + node, node_output, voxel_nuisance_regressors_merge, f"in{i + 1}" ) nuisance_wf.connect( @@ -1667,12 +1661,13 @@ def create_nuisance_regression_workflow(nuisance_selectors, name="nuisance_regre censor_selector = nuisance_selectors.get("Censor") if censor_selector.get("method") not in censor_methods: - raise ValueError( + msg = ( "Improper censoring method specified ({0}), " "should be one of {1}.".format( censor_selector.get("method"), censor_methods ) ) + raise ValueError(msg) find_censors = pe.Node( Function( @@ -1694,7 +1689,8 @@ def create_nuisance_regression_workflow(nuisance_selectors, name="nuisance_regre ) if not censor_selector.get("thresholds"): - raise ValueError("Censoring requested, but thresh_metric not provided.") + msg = "Censoring requested, but thresh_metric not provided." + raise ValueError(msg) for threshold in censor_selector["thresholds"]: if "type" not in threshold or threshold["type"] not in [ @@ -1702,12 +1698,12 @@ def create_nuisance_regression_workflow(nuisance_selectors, name="nuisance_regre "FD_J", "FD_P", ]: - raise ValueError( - "Censoring requested, but with invalid threshold type." - ) + msg = "Censoring requested, but with invalid threshold type." + raise ValueError(msg) if "value" not in threshold: - raise ValueError("Censoring requested, but threshold not provided.") + msg = "Censoring requested, but threshold not provided." + raise ValueError(msg) if threshold["type"] == "FD_J": find_censors.inputs.fd_j_threshold = threshold["value"] @@ -1776,9 +1772,8 @@ def create_nuisance_regression_workflow(nuisance_selectors, name="nuisance_regre if nuisance_selectors.get("PolyOrt"): if not nuisance_selectors["PolyOrt"].get("degree"): - raise ValueError( - "Polynomial orthogonalization requested, " "but degree not provided." - ) + msg = "Polynomial orthogonalization requested, but degree not provided." + raise ValueError(msg) nuisance_regression.inputs.polort = nuisance_selectors["PolyOrt"]["degree"] @@ -2484,8 +2479,8 @@ def nuisance_regressors_generation( ventricle = strat_pool.check_rpool("lateral-ventricles-mask") csf_mask = strat_pool.check_rpool( [ - f"{prefixes[0]}label-CSF_" "desc-eroded_mask", - f"{prefixes[0]}label-CSF_" "desc-preproc_mask", + f"{prefixes[0]}label-CSF_desc-eroded_mask", + f"{prefixes[0]}label-CSF_desc-preproc_mask", f"{prefixes[0]}label-CSF_mask", ] ) @@ -2519,7 +2514,7 @@ def nuisance_regressors_generation( node, out, regressors, "inputspec.anatomical_eroded_brain_mask_file_path" ) else: - logger.warning("No %s-space brain mask found in resource pool.", space) + IFLOGGER.warning("No %s-space brain mask found in resource pool.", space) if strat_pool.check_rpool( [ @@ -2530,14 +2525,14 @@ def nuisance_regressors_generation( ): node, out = strat_pool.get_data( [ - f"{prefixes[0]}label-CSF_" "desc-eroded_mask", - f"{prefixes[0]}label-CSF_" "desc-preproc_mask", + f"{prefixes[0]}label-CSF_desc-eroded_mask", + f"{prefixes[0]}label-CSF_desc-preproc_mask", f"{prefixes[0]}label-CSF_mask", ] ) wf.connect(node, out, regressors, "inputspec.csf_mask_file_path") else: - logger.warning("No %s-space CSF mask found in resource pool.", space) + IFLOGGER.warning("No %s-space CSF mask found in resource pool.", space) if strat_pool.check_rpool( [ @@ -2548,14 +2543,14 @@ def nuisance_regressors_generation( ): node, out = strat_pool.get_data( [ - f"{prefixes[0]}label-WM_" "desc-eroded_mask", - f"{prefixes[0]}label-WM_" "desc-preproc_mask", + f"{prefixes[0]}label-WM_desc-eroded_mask", + f"{prefixes[0]}label-WM_desc-preproc_mask", f"{prefixes[0]}label-WM_mask", ] ) wf.connect(node, out, regressors, "inputspec.wm_mask_file_path") else: - logger.warning("No %s-space WM mask found in resource pool.", space) + IFLOGGER.warning("No %s-space WM mask found in resource pool.", space) if strat_pool.check_rpool( [ @@ -2566,14 +2561,14 @@ def nuisance_regressors_generation( ): node, out = strat_pool.get_data( [ - f"{prefixes[0]}label-GM_" "desc-eroded_mask", - f"{prefixes[0]}label-GM_" "desc-preproc_mask", + f"{prefixes[0]}label-GM_desc-eroded_mask", + f"{prefixes[0]}label-GM_desc-preproc_mask", f"{prefixes[0]}label-GM_mask", ] ) wf.connect(node, out, regressors, "inputspec.gm_mask_file_path") else: - logger.warning("No %s-space GM mask found in resource pool.", space) + IFLOGGER.warning("No %s-space GM mask found in resource pool.", space) if ventricle: node, out = strat_pool.get_data("lateral-ventricles-mask") @@ -2868,7 +2863,7 @@ def ingress_regressors(wf, cfg, strat_pool, pipe_num, opt=None): # Will need to generalize the name node, out = strat_pool.get_data("pipeline-ingress_desc-confounds_timeseries") if not regressors_list: - logger.warning( + IFLOGGER.warning( "\n[!] Ingress regressors is on, but no regressors provided. " "The whole regressors file will be applied, but it may be" "too large for the timeseries data!" @@ -2880,8 +2875,6 @@ def ingress_regressors(wf, cfg, strat_pool, pipe_num, opt=None): "import numpy as np", "import os", "import CPAC", - "from nipype import logging", - 'logger = logging.getLogger("nipype.workflow")', ] ingress_regressors = pe.Node( Function( @@ -2932,24 +2925,26 @@ def parse_regressors(regressors_file, regressors_list): header.append(regressor) parsed_regressors[regressor] = full_file.loc[:, regressor] else: - logger.warning( + IFLOGGER.warning( f"\n[!] Regressor {regressor} not found in {regressors_file}" ) if parsed_regressors.empty: - raise Exception(f"Regressors not found in {regressors_file}") + msg = f"Regressors not found in {regressors_file}" + raise Exception(msg) regressors_path = os.path.join(os.getcwd(), "parsed_regressors.1D") parsed_regressors = parsed_regressors.to_numpy() check_vals = np.any(np.isnan(parsed_regressors)) if check_vals: - raise Exception( + msg = ( '\n[!] This regressors file contains "N/A" values.\n' "[!] Please choose a different dataset or " "remove regressors with those values." ) + raise Exception(msg) with open(regressors_path, "w") as ofd: # write out the header information - ofd.write("# C-PAC {0}\n".format(CPAC.__version__)) + ofd.write(f"# C-PAC {CPAC.__version__}\n") ofd.write("# Ingressed nuisance regressors:\n") np.savetxt(ofd, parsed_regressors, fmt="%.18f", delimiter="\t") diff --git a/CPAC/nuisance/tests/test_utils.py b/CPAC/nuisance/tests/test_utils.py index 3c0be4f48d..724d536b63 100644 --- a/CPAC/nuisance/tests/test_utils.py +++ b/CPAC/nuisance/tests/test_utils.py @@ -1,3 +1,4 @@ +from logging import basicConfig, INFO import os import tempfile @@ -6,6 +7,10 @@ import pytest from CPAC.nuisance.utils import calc_compcor_components, find_offending_time_points +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.nuisance.tests") +basicConfig(format="%(message)s", level=INFO) mocked_outputs = p.resource_filename( "CPAC", os.path.join("nuisance", "tests", "motion_statistics") @@ -36,6 +41,6 @@ def test_calc_compcor_components(): data_filename = "/cc_dev/cpac_working/old_compcor/nuisance_0_0/_scan_test/_selector_CSF-2mmE-M_aC-WM-2mm-DPC5_G-M_M-SDB_P-2_BP-B0.01-T0.1/Functional_2mm_flirt/sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg_calc_maths_flirt.nii.gz" mask_filename = "/cc_dev/cpac_working/old_compcor/nuisance_0_0/_scan_test/_selector_CSF-2mmE-M_aC-WM-2mm-DPC5_G-M_M-SDB_P-2_BP-B0.01-T0.1/aCompCor_union_masks/segment_seg_2_maths_flirt_mask.nii.gz" - calc_compcor_components(data_filename, 5, mask_filename) - + compcor_filename = calc_compcor_components(data_filename, 5, mask_filename) + logger.info("compcor components written to %s", compcor_filename) assert 0 == 1 diff --git a/CPAC/nuisance/utils/__init__.py b/CPAC/nuisance/utils/__init__.py index 4bb9d55505..cf0048ca3d 100644 --- a/CPAC/nuisance/utils/__init__.py +++ b/CPAC/nuisance/utils/__init__.py @@ -1,837 +1,34 @@ -from collections import OrderedDict -import os -import re - -from nipype import logging -from nipype.interfaces import afni, ants, fsl -import nipype.interfaces.utility as util - -from CPAC.nuisance.utils.crc import encode as crc_encode -from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.registration.utils import generate_inverse_transform_flags -from CPAC.utils.interfaces.fsl import Merge as fslMerge -from CPAC.utils.interfaces.function import Function - -logger = logging.getLogger("nipype.workflow") - - -def find_offending_time_points( - fd_j_file_path=None, - fd_p_file_path=None, - dvars_file_path=None, - fd_j_threshold=None, - fd_p_threshold=None, - dvars_threshold=None, - number_of_previous_trs_to_censor=0, - number_of_subsequent_trs_to_censor=0, -): - """ - Applies criterion in method to find time points whose FD or DVARS (or both) - are above threshold. - - :param fd_j_file_path: path to TSV containing framewise displacement as a - single column. If not specified, it will not be used. - :param fd_p_file_path: path to TSV containing framewise displacement as a - single column. If not specified, it will not be used. - :param dvars_file_path: path to TSV containing DVARS as a single column. - If not specified, it will not be used. - :param fd_j_threshold: threshold to apply to framewise displacement (Jenkinson), - it can be a value such as 0.2 or a floating point multiple of the - standard deviation specified as, e.g. '1.5SD'. - :param fd_p_threshold: threshold to apply to framewise displacement (Power), - it can be a value such as 0.2 or a floating point multiple of the - standard deviation specified as, e.g. '1.5SD'. - :param dvars_threshold: threshold to apply to DVARS, can be a value such - as 0.5 or a floating point multiple of the standard deviation specified - as, e.g. '1.5SD'. - :param number_of_previous_trs_to_censor: extent of censorship window before - the censor. - :param number_of_subsequent_trs_to_censor: extent of censorship window after - the censor. - - :return: File path to TSV file containing the volumes to be censored. - """ - import os - import re - - import numpy as np - - offending_time_points = set() - time_course_len = 0 - - types = ["FDJ", "FDP", "DVARS"] - file_paths = [fd_j_file_path, fd_p_file_path, dvars_file_path] - thresholds = [fd_j_threshold, fd_p_threshold, dvars_threshold] - # types = ['FDP', 'DVARS'] - # file_paths = [fd_p_file_path, dvars_file_path] - # thresholds = [fd_p_threshold, dvars_threshold] - - for type, file_path, threshold in zip(types, file_paths, thresholds): - if not file_path: - continue - - if not os.path.isfile(file_path): - raise ValueError("File {0} could not be found.".format(file_path)) - - if not threshold: - raise ValueError( - "Method requires the specification of a threshold, none received" - ) - - metric = np.loadtxt(file_path) - if type == "DVARS": - metric = np.array([0.0, *metric.tolist()]) - - if not time_course_len: - time_course_len = metric.shape[0] - else: - assert ( - time_course_len == metric.shape[0] - ), "Threshold metric files does not have same size." - - try: - threshold_sd = re.match(r"([0-9]*\.*[0-9]*)\s*SD", str(threshold)) - - if threshold_sd: - threshold_sd = float(threshold_sd.groups()[0]) - threshold = metric.mean() + threshold_sd * metric.std() - else: - threshold = float(threshold) - except: - raise ValueError( - "Could not translate threshold {0} into a " "meaningful value".format( - threshold - ) - ) - - offending_time_points |= set(np.where(metric > threshold)[0].tolist()) - - extended_censors = [] - for censor in offending_time_points: - extended_censors += list( - range( - (censor - number_of_previous_trs_to_censor), - (censor + number_of_subsequent_trs_to_censor + 1), - ) - ) - - extended_censors = [ - censor - for censor in np.unique(extended_censors) - if 0 <= censor < time_course_len - ] - - censor_vector = np.ones((time_course_len, 1)) - censor_vector[extended_censors] = 0 - - out_file_path = os.path.join(os.getcwd(), "censors.tsv") - np.savetxt(out_file_path, censor_vector, fmt="%d", header="censor", comments="") - - return out_file_path - - -def compute_threshold(in_file, mask, threshold): - return threshold - - -def compute_pct_threshold(in_file, mask, threshold_pct): - import numpy as np - import nibabel as nib - - m = nib.load(mask).get_fdata().astype(bool) - if not np.any(m): - return 0.0 - d = nib.load(in_file).get_fdata()[m] - return np.percentile(d, 100.0 - threshold_pct) - - -def compute_sd_threshold(in_file, mask, threshold_sd): - import numpy as np - import nibabel as nib - - m = nib.load(mask).get_fdata().astype(bool) - if not np.any(m): - return 0.0 - d = nib.load(in_file).get_fdata()[m] - return d.mean() + threshold_sd * d.std() - - -def temporal_variance_mask(threshold, by_slice=False, erosion=False, degree=1): - threshold_method = "VAR" - - if isinstance(threshold, str): - regex_match = { - "SD": r"([0-9]+(\.[0-9]+)?)\s*SD", - "PCT": r"([0-9]+(\.[0-9]+)?)\s*PCT", - } - - for method, regex in regex_match.items(): - matched = re.match(regex, threshold) - if matched: - threshold_method = method - threshold_value = matched.groups()[0] - - try: - threshold_value = float(threshold_value) - except: - raise ValueError( - "Error converting threshold value {0} from {1} to a " - "floating point number. The threshold value can " - "contain SD or PCT for selecting a threshold based on " - "the variance distribution, otherwise it should be a " - "floating point number.".format(threshold_value, threshold) - ) - - if threshold_value < 0: - raise ValueError( - "Threshold value should be positive, instead of {0}.".format( - threshold_value - ) - ) - - if threshold_method == "PCT" and threshold_value >= 100.0: - raise ValueError( - "Percentile should be less than 100, received {0}.".format(threshold_value) - ) - - threshold = threshold_value - - wf = pe.Workflow(name="tcompcor") - - input_node = pe.Node( - util.IdentityInterface(fields=["functional_file_path", "mask_file_path"]), - name="inputspec", - ) - output_node = pe.Node(util.IdentityInterface(fields=["mask"]), name="outputspec") - - # C-PAC default performs linear regression while nipype performs quadratic regression - detrend = pe.Node( - afni.Detrend(args="-polort {0}".format(degree), outputtype="NIFTI"), - name="detrend", - ) - wf.connect(input_node, "functional_file_path", detrend, "in_file") - - std = pe.Node(afni.TStat(args="-nzstdev", outputtype="NIFTI"), name="std") - wf.connect(input_node, "mask_file_path", std, "mask") - wf.connect(detrend, "out_file", std, "in_file") - - var = pe.Node(afni.Calc(expr="a*a", outputtype="NIFTI"), name="var") - wf.connect(std, "out_file", var, "in_file_a") - - if by_slice: - slices = pe.Node(fsl.Slice(), name="slicer") - wf.connect(var, "out_file", slices, "in_file") - - mask_slices = pe.Node(fsl.Slice(), name="mask_slicer") - wf.connect(input_node, "mask_file_path", mask_slices, "in_file") - - mapper = pe.MapNode( - util.IdentityInterface(fields=["out_file", "mask_file"]), - name="slice_mapper", - iterfield=["out_file", "mask_file"], - ) - wf.connect(slices, "out_files", mapper, "out_file") - wf.connect(mask_slices, "out_files", mapper, "mask_file") - - else: - mapper_list = pe.Node(util.Merge(1), name="slice_mapper_list") - wf.connect(var, "out_file", mapper_list, "in1") - - mask_mapper_list = pe.Node(util.Merge(1), name="slice_mask_mapper_list") - wf.connect(input_node, "mask_file_path", mask_mapper_list, "in1") - - mapper = pe.Node( - util.IdentityInterface(fields=["out_file", "mask_file"]), - name="slice_mapper", - ) - wf.connect(mapper_list, "out", mapper, "out_file") - wf.connect(mask_mapper_list, "out", mapper, "mask_file") - - if threshold_method == "PCT": - threshold_node = pe.MapNode( - Function( - input_names=["in_file", "mask", "threshold_pct"], - output_names=["threshold"], - function=compute_pct_threshold, - as_module=True, - ), - name="threshold_value", - iterfield=["in_file", "mask"], - ) - threshold_node.inputs.threshold_pct = threshold_value - wf.connect(mapper, "out_file", threshold_node, "in_file") - wf.connect(mapper, "mask_file", threshold_node, "mask") - - elif threshold_method == "SD": - threshold_node = pe.MapNode( - Function( - input_names=["in_file", "mask", "threshold_sd"], - output_names=["threshold"], - function=compute_sd_threshold, - as_module=True, - ), - name="threshold_value", - iterfield=["in_file", "mask"], - ) - threshold_node.inputs.threshold_sd = threshold_value - wf.connect(mapper, "out_file", threshold_node, "in_file") - wf.connect(mapper, "mask_file", threshold_node, "mask") - - else: - threshold_node = pe.MapNode( - Function( - input_names=["in_file", "mask", "threshold"], - output_names=["threshold"], - function=compute_threshold, - as_module=True, - ), - name="threshold_value", - iterfield=["in_file", "mask"], - ) - threshold_node.inputs.threshold = threshold_value - wf.connect(mapper, "out_file", threshold_node, "in_file") - wf.connect(mapper, "mask_file", threshold_node, "mask") - - threshold_mask = pe.MapNode( - interface=fsl.maths.Threshold(), - name="threshold", - iterfield=["in_file", "thresh"], - ) - threshold_mask.inputs.args = "-bin" - wf.connect(mapper, "out_file", threshold_mask, "in_file") - wf.connect(threshold_node, "threshold", threshold_mask, "thresh") - - merge_slice_masks = pe.Node(interface=fslMerge(), name="merge_slice_masks") - merge_slice_masks.inputs.dimension = "z" - wf.connect(threshold_mask, "out_file", merge_slice_masks, "in_files") - - wf.connect(merge_slice_masks, "merged_file", output_node, "mask") - - return wf - - -def generate_summarize_tissue_mask( - nuisance_wf, - pipeline_resource_pool, - regressor_descriptor, - regressor_selector, - csf_mask_exist, - use_ants=True, - ventricle_mask_exist=True, - all_bold=False, -): - """ - Add tissue mask generation into pipeline according to the selector. - - :param nuisance_wf: Nuisance regressor workflow. - :param pipeline_resource_pool: dictionary of available resources. - :param regressor_descriptor: dictionary of steps to build, including keys: - 'tissue', 'resolution', 'erosion' - :param regressor_selector: dictionary with the original selector - - :return: the full path of the 3D nifti file containing the mask created by - this operation. - """ - steps = [ - key - for key in ["tissue", "resolution", "erosion"] - if key in regressor_descriptor - ] - - full_mask_key = "_".join(regressor_descriptor[s] for s in steps) - - for step_i, step in enumerate(steps): - mask_key = "_".join(regressor_descriptor[s] for s in steps[: step_i + 1]) - - if mask_key in pipeline_resource_pool: - continue - - node_mask_key = re.sub(r"[^\w]", "_", mask_key) - - prev_mask_key = "_".join(regressor_descriptor[s] for s in steps[:step_i]) - - if step == "tissue": - pass - - elif step == "resolution": - if all_bold: - pass - - if csf_mask_exist: - mask_to_epi = pe.Node( - interface=fsl.FLIRT(), - name="{}_flirt".format(node_mask_key), - mem_gb=3.63, - mem_x=(3767129957844731 / 1208925819614629174706176, "in_file"), - ) - - mask_to_epi.inputs.interp = "nearestneighbour" - - if regressor_selector["extraction_resolution"] == "Functional": - # apply anat2func matrix - mask_to_epi.inputs.apply_xfm = True - mask_to_epi.inputs.output_type = "NIFTI_GZ" - nuisance_wf.connect( - *( - pipeline_resource_pool["Functional_mean"] - + (mask_to_epi, "reference") - ) - ) - nuisance_wf.connect( - *( - pipeline_resource_pool["Transformations"][ - "anat_to_func_linear_xfm" - ] - + (mask_to_epi, "in_matrix_file") - ) - ) - - else: - resolution = regressor_selector["extraction_resolution"] - mask_to_epi.inputs.apply_isoxfm = resolution - - nuisance_wf.connect( - *( - pipeline_resource_pool["Anatomical_{}mm".format(resolution)] - + (mask_to_epi, "reference") - ) - ) - - nuisance_wf.connect( - *(pipeline_resource_pool[prev_mask_key] + (mask_to_epi, "in_file")) - ) - - pipeline_resource_pool[mask_key] = (mask_to_epi, "out_file") - - if full_mask_key.startswith("CerebrospinalFluid"): - pipeline_resource_pool = ( - generate_summarize_tissue_mask_ventricles_masking( - nuisance_wf, - pipeline_resource_pool, - regressor_descriptor, - regressor_selector, - node_mask_key, - csf_mask_exist, - use_ants, - ventricle_mask_exist, - ) - ) - - elif step == "erosion": - erode_mask_node = pe.Node( - afni.Calc( - args="-b a+i -c a-i -d a+j -e a-j -f a+k -g a-k", - expr="a*(1-amongst(0,b,c,d,e,f,g))", - outputtype="NIFTI_GZ", - ), - name="{}".format(node_mask_key), - ) - - nuisance_wf.connect( - *( - pipeline_resource_pool[prev_mask_key] - + (erode_mask_node, "in_file_a") - ) - ) - - pipeline_resource_pool[mask_key] = (erode_mask_node, "out_file") - - return pipeline_resource_pool, full_mask_key - - -def generate_summarize_tissue_mask_ventricles_masking( - nuisance_wf, - pipeline_resource_pool, - regressor_descriptor, - regressor_selector, - mask_key, - csf_mask_exist, - use_ants=True, - ventricle_mask_exist=True, -): - if csf_mask_exist is False: - logger.warning( - "Segmentation is Off, - therefore will be using " - "lateral_ventricle_mask as CerebrospinalFluid_mask." - ) - - # Mask CSF with Ventricles - if "{}_Unmasked".format(mask_key) not in pipeline_resource_pool: - if ventricle_mask_exist: - ventricles_key = "VentriclesToAnat" - - if "resolution" in regressor_descriptor: - ventricles_key += "_{}".format(regressor_descriptor["resolution"]) - - if ventricles_key not in pipeline_resource_pool: - transforms = pipeline_resource_pool["Transformations"] - - if use_ants is True: - # perform the transform using ANTS - collect_linear_transforms = pe.Node( - util.Merge(3), name="{}_ants_transforms".format(ventricles_key) - ) - - nuisance_wf.connect( - *( - transforms["mni_to_anat_linear_xfm"] - + (collect_linear_transforms, "in1") - ) - ) - - # generate inverse transform flags, which depends on the number of transforms - inverse_transform_flags = pe.Node( - util.Function( - input_names=["transform_list"], - output_names=["inverse_transform_flags"], - function=generate_inverse_transform_flags, - ), - name="{0}_inverse_transform_flags".format(ventricles_key), - ) - nuisance_wf.connect( - collect_linear_transforms, - "out", - inverse_transform_flags, - "transform_list", - ) - - lat_ven_mni_to_anat = pe.Node( - interface=ants.ApplyTransforms(), - name="{}_ants".format(ventricles_key), - mem_gb=0.683, - mem_x=( - 3811976743057169 / 302231454903657293676544, - "input_image", - ), - ) - lat_ven_mni_to_anat.inputs.interpolation = "NearestNeighbor" - lat_ven_mni_to_anat.inputs.dimension = 3 - - nuisance_wf.connect( - inverse_transform_flags, - "inverse_transform_flags", - lat_ven_mni_to_anat, - "invert_transform_flags", - ) - nuisance_wf.connect( - collect_linear_transforms, - "out", - lat_ven_mni_to_anat, - "transforms", - ) - - nuisance_wf.connect( - *( - pipeline_resource_pool["Ventricles"] - + (lat_ven_mni_to_anat, "input_image") - ) - ) - resolution = regressor_selector["extraction_resolution"] - - if csf_mask_exist: - nuisance_wf.connect( - *( - pipeline_resource_pool[mask_key] - + (lat_ven_mni_to_anat, "reference_image") - ) - ) - elif resolution == "Functional": - nuisance_wf.connect( - *( - pipeline_resource_pool["Functional_mean"] - + (lat_ven_mni_to_anat, "reference_image") - ) - ) - else: - nuisance_wf.connect( - *( - pipeline_resource_pool[ - "Anatomical_{}mm".format(resolution) - ] - + (lat_ven_mni_to_anat, "reference_image") - ) - ) - - pipeline_resource_pool[ventricles_key] = ( - lat_ven_mni_to_anat, - "output_image", - ) - - else: - # perform the transform using FLIRT - lat_ven_mni_to_anat = pe.Node( - interface=fsl.ApplyWarp(), - name="{}_fsl_applywarp".format(ventricles_key), - ) - lat_ven_mni_to_anat.inputs.interp = "nn" - - nuisance_wf.connect( - *( - transforms["mni_to_anat_linear_xfm"] - + (lat_ven_mni_to_anat, "field_file") - ) - ) - nuisance_wf.connect( - *( - pipeline_resource_pool["Ventricles"] - + (lat_ven_mni_to_anat, "in_file") - ) - ) - nuisance_wf.connect( - *( - pipeline_resource_pool[mask_key] - + (lat_ven_mni_to_anat, "ref_file") - ) - ) - - pipeline_resource_pool[ventricles_key] = ( - lat_ven_mni_to_anat, - "out_file", - ) - - if csf_mask_exist: - # reduce CSF mask to the lateral ventricles - mask_csf_with_lat_ven = pe.Node( - interface=afni.Calc(outputtype="NIFTI_GZ"), - name="{}_Ventricles".format(mask_key), - ) - mask_csf_with_lat_ven.inputs.expr = "a*b" - mask_csf_with_lat_ven.inputs.out_file = "csf_lat_ven_mask.nii.gz" - - nuisance_wf.connect( - *( - pipeline_resource_pool[ventricles_key] - + (mask_csf_with_lat_ven, "in_file_a") - ) - ) - nuisance_wf.connect( - *( - pipeline_resource_pool[mask_key] - + (mask_csf_with_lat_ven, "in_file_b") - ) - ) - - pipeline_resource_pool[ - "{}_Unmasked".format(mask_key) - ] = pipeline_resource_pool[mask_key] - pipeline_resource_pool[mask_key] = (mask_csf_with_lat_ven, "out_file") - - else: - pipeline_resource_pool[mask_key] = pipeline_resource_pool[ - ventricles_key - ] - - return pipeline_resource_pool - return None - - -class NuisanceRegressor(object): - def __init__(self, selector): - self.selector = selector - - if "Bandpass" in self.selector: - s = self.selector["Bandpass"] - if type(s) is not dict or ( - not s.get("bottom_frequency") and not s.get("top_frequency") - ): - del self.selector["Bandpass"] - - def get(self, key, default=None): - return self.selector.get(key, default) - - def __contains__(self, key): - return key in self.selector - - def __getitem__(self, key): - return self.selector[key] - - @staticmethod - def _derivative_params(selector): - nr_repr = "" - if not selector: - return nr_repr - if selector.get("include_squared"): - nr_repr += "S" - if selector.get("include_delayed"): - nr_repr += "D" - if selector.get("include_delayed_squared"): - nr_repr += "B" - if selector.get("include_backdiff"): - nr_repr += "V" - if selector.get("include_backdiff_squared"): - nr_repr += "C" - return nr_repr - - @staticmethod - def _summary_params(selector): - summ = selector["summary"] - - methods = { - "PC": "PC", - "DetrendPC": "DPC", - "Mean": "M", - "NormMean": "NM", - "DetrendMean": "DM", - "DetrendNormMean": "DNM", - } - - if type(summ) == dict: - method = summ["method"] - rep = methods[method] - if method in ["DetrendPC", "PC"]: - rep += "%d" % summ["components"] - else: - rep = methods[summ] - - return rep - - @staticmethod - def encode(selector): - regs = OrderedDict( - [ - ("GreyMatter", "GM"), - ("WhiteMatter", "WM"), - ("CerebrospinalFluid", "CSF"), - ("tCompCor", "tC"), - ("aCompCor", "aC"), - ("GlobalSignal", "G"), - ("Motion", "M"), - ("Custom", "T"), - ("PolyOrt", "P"), - ("Bandpass", "BP"), - ("Censor", "C"), - ] - ) - - tissues = ["GreyMatter", "WhiteMatter", "CerebrospinalFluid"] - - selectors_representations = [] - - # tC-1.5PT-PC5S-SDB - # aC-WC-2mmE-PC5-SDB - - # WM-2mmE-PC5-SDB - # CSF-2mmE-M-SDB - # GM-2mmE-DNM-SDB - - # G-PC5-SDB - # M-SDB - # C-S-FD1.5SD-D1.5SD - # P-2 - # B-T0.01-B0.1 - - for r in regs.keys(): - if r not in selector: - continue - - s = selector[r] - - pieces = [regs[r]] - - if r in tissues: - if ( - s.get("extraction_resolution") - and s["extraction_resolution"] != "Functional" - ): - res = "%.2gmm" % s["extraction_resolution"] - if s.get("erode_mask"): - res += "E" - pieces += [res] - - pieces += [NuisanceRegressor._summary_params(s)] - pieces += [NuisanceRegressor._derivative_params(s)] - - elif r == "tCompCor": - threshold = "" - if s.get("by_slice"): - threshold += "S" - t = s.get("threshold") - if t: - if type(t) != str: - t = "%.2f" % t - threshold += t - if s.get("erode_mask"): - threshold += "E" - if s.get("degree"): - d = s.get("degree") - threshold += str(d) - - pieces += [threshold] - pieces += [NuisanceRegressor._summary_params(s)] - pieces += [NuisanceRegressor._derivative_params(s)] - - elif r == "aCompCor": - if s.get("tissues"): - pieces += ["+".join([regs[t] for t in sorted(s["tissues"])])] - - if s.get("extraction_resolution"): - res = "%.2gmm" % s["extraction_resolution"] - if s.get("erode_mask"): - res += "E" - pieces += [res] - - pieces += [NuisanceRegressor._summary_params(s)] - pieces += [NuisanceRegressor._derivative_params(s)] - - elif r == "Custom": - for ss in s: - pieces += [ - os.path.basename(ss["file"])[0:5] + crc_encode(ss["file"]) - ] - - elif r == "GlobalSignal": - pieces += [NuisanceRegressor._summary_params(s)] - pieces += [NuisanceRegressor._derivative_params(s)] - - elif r == "Motion": - pieces += [NuisanceRegressor._derivative_params(s)] - - elif r == "PolyOrt": - pieces += ["%d" % s["degree"]] - - elif r == "Bandpass": - if s.get("bottom_frequency"): - pieces += ["B%.2g" % s["bottom_frequency"]] - if s.get("top_frequency"): - pieces += ["T%.2g" % s["top_frequency"]] - - elif r == "Censor": - censoring = { - "Kill": "K", - "Zero": "Z", - "Interpolate": "I", - "SpikeRegression": "S", - } - - thresholds = { - "FD_J": "FD-J", - "FD_P": "FD-P", - "DVARS": "DV", - } - - pieces += [censoring[s["method"]]] - - trs_range = ["0", "0"] - if s.get("number_of_previous_trs_to_censor"): - trs_range[0] = "%d" % s["number_of_previous_trs_to_censor"] - if s.get("number_of_subsequent_trs_to_censor"): - trs_range[1] = "%d" % s["number_of_subsequent_trs_to_censor"] - - pieces += ["+".join(trs_range)] - - threshs = sorted(s["thresholds"], reverse=True, key=lambda d: d["type"]) - for st in threshs: - thresh = thresholds[st["type"]] - if type(st["value"]) == str: - thresh += st["value"] - else: - thresh += "%.2g" % st["value"] - - pieces += [thresh] - - selectors_representations += ["-".join([_f for _f in pieces if _f])] - - return "_".join(selectors_representations) - - def __repr__(self): - return NuisanceRegressor.encode(self.selector) +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Utilities for nuisance regression.""" +from . import compcor +from .compcor import calc_compcor_components +from .utils import ( + find_offending_time_points, + generate_summarize_tissue_mask, + NuisanceRegressor, + temporal_variance_mask, +) + +__all__ = [ + "calc_compcor_components", + "compcor", + "find_offending_time_points", + "generate_summarize_tissue_mask", + "NuisanceRegressor", + "temporal_variance_mask", +] diff --git a/CPAC/nuisance/utils/compcor.py b/CPAC/nuisance/utils/compcor.py index c6093c49cc..54de22daaa 100644 --- a/CPAC/nuisance/utils/compcor.py +++ b/CPAC/nuisance/utils/compcor.py @@ -1,40 +1,54 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import numpy as np import nibabel as nib -from nipype import logging +from nibabel.filebasedimages import ImageFileError from scipy import signal from scipy.linalg import svd from CPAC.utils import safe_shape - -iflogger = logging.getLogger("nipype.interface") +from CPAC.utils.monitoring import IFLOGGER def calc_compcor_components(data_filename, num_components, mask_filename): if num_components < 1: - raise ValueError( - "Improper value for num_components ({0}), should be >= 1.".format( - num_components - ) - ) + msg = f"Improper value for num_components ({num_components}), should be >= 1." + raise ValueError(msg) try: image_data = nib.load(data_filename).get_fdata().astype(np.float64) - except: - raise + except (ImageFileError, MemoryError, OSError, TypeError, ValueError) as e: + msg = f"Unable to load data from {data_filename}" + raise ImageFileError(msg) from e try: binary_mask = nib.load(mask_filename).get_fdata().astype(np.int16) - except: - pass + except (ImageFileError, MemoryError, OSError, TypeError, ValueError) as e: + msg = f"Unable to load data from {mask_filename}" + raise ImageFileError(msg) from e if not safe_shape(image_data, binary_mask): - raise ValueError( - "The data in {0} and {1} do not have a consistent shape".format( - data_filename, mask_filename - ) + msg = ( + f"The data in {data_filename} and {mask_filename} do not have a" + " consistent shape" ) + raise ValueError(msg) # make sure that the values in binary_mask are binary binary_mask[binary_mask > 0] = 1 @@ -44,6 +58,7 @@ def calc_compcor_components(data_filename, num_components, mask_filename): image_data = image_data[binary_mask == 1, :] # filter out any voxels whose variance equals 0 + IFLOGGER.info("Removing zero variance components") image_data = image_data[image_data.std(1) != 0, :] if image_data.shape.count(0): @@ -53,10 +68,11 @@ def calc_compcor_components(data_filename, num_components, mask_filename): ) raise Exception(err) + IFLOGGER.info("Detrending and centering data") Y = signal.detrend(image_data, axis=1, type="linear").T Yc = Y - np.tile(Y.mean(0), (Y.shape[0], 1)) Yc = Yc / np.tile(np.array(Yc.std(0)).reshape(1, Yc.shape[1]), (Yc.shape[0], 1)) - + IFLOGGER.info("Calculating SVD decomposition of Y*Y'") U, S, Vh = np.linalg.svd(Yc, full_matrices=False) # write out the resulting regressor file @@ -179,7 +195,7 @@ def _full_rank(X, cmax=1e15): c = smax / smin if c < cmax: return X, c - iflogger.warning("Matrix is singular at working precision, regularizing...") + IFLOGGER.warning("Matrix is singular at working precision, regularizing...") lda = (smax - cmax * smin) / (cmax - 1) s = s + lda X = np.dot(U, np.dot(np.diag(s), V)) @@ -212,7 +228,8 @@ def TR_string_to_float(tr): tr in seconds (float) """ if not isinstance(tr, str): - raise TypeError(f"Improper type for TR_string_to_float ({tr}).") + msg = f"Improper type for TR_string_to_float ({tr})." + raise TypeError(msg) tr_str = tr.replace(" ", "") @@ -224,6 +241,7 @@ def TR_string_to_float(tr): else: tr_numeric = float(tr_str) except Exception as exc: - raise ValueError(f'Can not convert TR string to float: "{tr}".') from exc + msg = f'Can not convert TR string to float: "{tr}".' + raise ValueError(msg) from exc return tr_numeric diff --git a/CPAC/nuisance/utils/utils.py b/CPAC/nuisance/utils/utils.py new file mode 100644 index 0000000000..7cf1b260d7 --- /dev/null +++ b/CPAC/nuisance/utils/utils.py @@ -0,0 +1,860 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""General utilities for nuisance regression.""" +from collections import OrderedDict +import os +import re +from typing import Optional + +from nipype.interfaces import afni, ants, fsl +import nipype.interfaces.utility as util +from nipype.pipeline.engine import Workflow + +from CPAC.nuisance.utils.crc import encode as crc_encode +from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.registration.utils import generate_inverse_transform_flags +from CPAC.utils.interfaces.fsl import Merge as fslMerge +from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import IFLOGGER + + +def find_offending_time_points( + fd_j_file_path=None, + fd_p_file_path=None, + dvars_file_path=None, + fd_j_threshold=None, + fd_p_threshold=None, + dvars_threshold=None, + number_of_previous_trs_to_censor=0, + number_of_subsequent_trs_to_censor=0, +): + """ + Find time points whose FD and/or DVARS are > threshold. + + :param fd_j_file_path: path to TSV containing framewise displacement as a + single column. If not specified, it will not be used. + :param fd_p_file_path: path to TSV containing framewise displacement as a + single column. If not specified, it will not be used. + :param dvars_file_path: path to TSV containing DVARS as a single column. + If not specified, it will not be used. + :param fd_j_threshold: threshold to apply to framewise displacement (Jenkinson), + it can be a value such as 0.2 or a floating point multiple of the + standard deviation specified as, e.g. '1.5SD'. + :param fd_p_threshold: threshold to apply to framewise displacement (Power), + it can be a value such as 0.2 or a floating point multiple of the + standard deviation specified as, e.g. '1.5SD'. + :param dvars_threshold: threshold to apply to DVARS, can be a value such + as 0.5 or a floating point multiple of the standard deviation specified + as, e.g. '1.5SD'. + :param number_of_previous_trs_to_censor: extent of censorship window before + the censor. + :param number_of_subsequent_trs_to_censor: extent of censorship window after + the censor. + + :return: File path to TSV file containing the volumes to be censored. + """ + import os + import re + + import numpy as np + + offending_time_points = set() + time_course_len = 0 + + motion_measures = ["FDJ", "FDP", "DVARS"] + file_paths = [fd_j_file_path, fd_p_file_path, dvars_file_path] + thresholds = [fd_j_threshold, fd_p_threshold, dvars_threshold] + + for motion_measure, file_path, _threshold in zip( + motion_measures, file_paths, thresholds + ): + threshold = _threshold + if not file_path: + continue + + if not os.path.isfile(file_path): + msg = f"File {file_path} could not be found." + raise ValueError(msg) + + if not threshold: + msg = "Method requires the specification of a threshold, none received" + raise ValueError(msg) + + metric = np.loadtxt(file_path) + if motion_measure == "DVARS": + metric = np.array([0.0, *metric.tolist()]) + + if not time_course_len: + time_course_len = metric.shape[0] + else: + assert ( + time_course_len == metric.shape[0] + ), "Threshold metric files does not have same size." + + try: + threshold_sd = re.match(r"([0-9]*\.*[0-9]*)\s*SD", str(threshold)) + + if threshold_sd: + threshold_sd = float(threshold_sd.groups()[0]) + threshold = metric.mean() + threshold_sd * metric.std() + else: + threshold = float(threshold) + except (AttributeError, re.error, IndexError, TypeError, ValueError): + msg = f"Could not translate threshold {threshold} into a meaningful value" + raise ValueError(msg) + + offending_time_points |= set(np.where(metric > threshold)[0].tolist()) + + extended_censors = [] + for censor in offending_time_points: + extended_censors += list( + range( + (censor - number_of_previous_trs_to_censor), + (censor + number_of_subsequent_trs_to_censor + 1), + ) + ) + + extended_censors = [ + censor + for censor in np.unique(extended_censors) + if 0 <= censor < time_course_len + ] + + censor_vector = np.ones((time_course_len, 1)) + censor_vector[extended_censors] = 0 + + out_file_path = os.path.join(os.getcwd(), "censors.tsv") + np.savetxt(out_file_path, censor_vector, fmt="%d", header="censor", comments="") + + return out_file_path + + +def compute_threshold(in_file, mask, threshold): + """Return a given threshold.""" + return threshold + + +def compute_pct_threshold(in_file, mask, threshold_pct): + """Compute the threshold based on the percentile of the data.""" + import numpy as np + import nibabel as nib + + m = nib.load(mask).get_fdata().astype(bool) + if not np.any(m): + return 0.0 + d = nib.load(in_file).get_fdata()[m] + return np.percentile(d, 100.0 - threshold_pct) + + +def compute_sd_threshold(in_file, mask, threshold_sd): + """Compute the threshold based on the mean and standard deviation of the data.""" + import numpy as np + import nibabel as nib + + m = nib.load(mask).get_fdata().astype(bool) + if not np.any(m): + return 0.0 + d = nib.load(in_file).get_fdata()[m] + return d.mean() + threshold_sd * d.std() + + +def temporal_variance_mask( + threshold, by_slice=False, erosion=False, degree=1 +) -> Workflow: + """Create a mask based on the temporal variance of the data.""" + threshold_method = "VAR" + + if isinstance(threshold, str): + regex_match = { + "SD": r"([0-9]+(\.[0-9]+)?)\s*SD", + "PCT": r"([0-9]+(\.[0-9]+)?)\s*PCT", + } + + for method, regex in regex_match.items(): + matched = re.match(regex, threshold) + if matched: + threshold_method = method + threshold_value = matched.groups()[0] + + try: + threshold_value = float(threshold_value) + except (TypeError, ValueError): + msg = ( + f"Error converting threshold value {threshold_value} from {threshold} to a " + "floating point number. The threshold value can " + "contain SD or PCT for selecting a threshold based on " + "the variance distribution, otherwise it should be a " + "floating point number." + ) + raise ValueError(msg) + + if threshold_value < 0: + msg = f"Threshold value should be positive, instead of {threshold_value}." + raise ValueError(msg) + + if threshold_method == "PCT" and threshold_value >= 100.0: # noqa: PLR2004 + msg = f"Percentile should be less than 100, received {threshold_value}." + raise ValueError(msg) + + threshold = threshold_value + + wf = pe.Workflow(name="tcompcor") + + input_node = pe.Node( + util.IdentityInterface(fields=["functional_file_path", "mask_file_path"]), + name="inputspec", + ) + output_node = pe.Node(util.IdentityInterface(fields=["mask"]), name="outputspec") + + # C-PAC default performs linear regression while nipype performs quadratic regression + detrend = pe.Node( + afni.Detrend(args=f"-polort {degree}", outputtype="NIFTI"), + name="detrend", + ) + wf.connect(input_node, "functional_file_path", detrend, "in_file") + + std = pe.Node(afni.TStat(args="-nzstdev", outputtype="NIFTI"), name="std") + wf.connect(input_node, "mask_file_path", std, "mask") + wf.connect(detrend, "out_file", std, "in_file") + + var = pe.Node(afni.Calc(expr="a*a", outputtype="NIFTI"), name="var") + wf.connect(std, "out_file", var, "in_file_a") + + if by_slice: + slices = pe.Node(fsl.Slice(), name="slicer") + wf.connect(var, "out_file", slices, "in_file") + + mask_slices = pe.Node(fsl.Slice(), name="mask_slicer") + wf.connect(input_node, "mask_file_path", mask_slices, "in_file") + + mapper = pe.MapNode( + util.IdentityInterface(fields=["out_file", "mask_file"]), + name="slice_mapper", + iterfield=["out_file", "mask_file"], + ) + wf.connect(slices, "out_files", mapper, "out_file") + wf.connect(mask_slices, "out_files", mapper, "mask_file") + + else: + mapper_list = pe.Node(util.Merge(1), name="slice_mapper_list") + wf.connect(var, "out_file", mapper_list, "in1") + + mask_mapper_list = pe.Node(util.Merge(1), name="slice_mask_mapper_list") + wf.connect(input_node, "mask_file_path", mask_mapper_list, "in1") + + mapper = pe.Node( + util.IdentityInterface(fields=["out_file", "mask_file"]), + name="slice_mapper", + ) + wf.connect(mapper_list, "out", mapper, "out_file") + wf.connect(mask_mapper_list, "out", mapper, "mask_file") + + if threshold_method == "PCT": + threshold_node = pe.MapNode( + Function( + input_names=["in_file", "mask", "threshold_pct"], + output_names=["threshold"], + function=compute_pct_threshold, + as_module=True, + ), + name="threshold_value", + iterfield=["in_file", "mask"], + ) + threshold_node.inputs.threshold_pct = threshold_value + wf.connect(mapper, "out_file", threshold_node, "in_file") + wf.connect(mapper, "mask_file", threshold_node, "mask") + + elif threshold_method == "SD": + threshold_node = pe.MapNode( + Function( + input_names=["in_file", "mask", "threshold_sd"], + output_names=["threshold"], + function=compute_sd_threshold, + as_module=True, + ), + name="threshold_value", + iterfield=["in_file", "mask"], + ) + threshold_node.inputs.threshold_sd = threshold_value + wf.connect(mapper, "out_file", threshold_node, "in_file") + wf.connect(mapper, "mask_file", threshold_node, "mask") + + else: + threshold_node = pe.MapNode( + Function( + input_names=["in_file", "mask", "threshold"], + output_names=["threshold"], + function=compute_threshold, + as_module=True, + ), + name="threshold_value", + iterfield=["in_file", "mask"], + ) + threshold_node.inputs.threshold = threshold_value + wf.connect(mapper, "out_file", threshold_node, "in_file") + wf.connect(mapper, "mask_file", threshold_node, "mask") + + threshold_mask = pe.MapNode( + interface=fsl.maths.Threshold(), + name="threshold", + iterfield=["in_file", "thresh"], + ) + threshold_mask.inputs.args = "-bin" + wf.connect(mapper, "out_file", threshold_mask, "in_file") + wf.connect(threshold_node, "threshold", threshold_mask, "thresh") + + merge_slice_masks = pe.Node(interface=fslMerge(), name="merge_slice_masks") + merge_slice_masks.inputs.dimension = "z" + wf.connect(threshold_mask, "out_file", merge_slice_masks, "in_files") + + wf.connect(merge_slice_masks, "merged_file", output_node, "mask") + + return wf + + +def generate_summarize_tissue_mask( + nuisance_wf, + pipeline_resource_pool, + regressor_descriptor, + regressor_selector, + csf_mask_exist, + use_ants=True, + ventricle_mask_exist=True, + all_bold=False, +): + """ + Add tissue mask generation into pipeline according to the selector. + + :param nuisance_wf: Nuisance regressor workflow. + :param pipeline_resource_pool: dictionary of available resources. + :param regressor_descriptor: dictionary of steps to build, including keys: + 'tissue', 'resolution', 'erosion' + :param regressor_selector: dictionary with the original selector + + :return: the full path of the 3D nifti file containing the mask created by + this operation. + """ + steps = [ + key + for key in ["tissue", "resolution", "erosion"] + if key in regressor_descriptor + ] + + full_mask_key = "_".join(regressor_descriptor[s] for s in steps) + + for step_i, step in enumerate(steps): + mask_key = "_".join(regressor_descriptor[s] for s in steps[: step_i + 1]) + + if mask_key in pipeline_resource_pool: + continue + + node_mask_key = re.sub(r"[^\w]", "_", mask_key) + + prev_mask_key = "_".join(regressor_descriptor[s] for s in steps[:step_i]) + + if step == "tissue": + pass + + elif step == "resolution": + if all_bold: + pass + + if csf_mask_exist: + mask_to_epi = pe.Node( + interface=fsl.FLIRT(), + name=f"{node_mask_key}_flirt", + mem_gb=3.63, + mem_x=(3767129957844731 / 1208925819614629174706176, "in_file"), + ) + + mask_to_epi.inputs.interp = "nearestneighbour" + + if regressor_selector["extraction_resolution"] == "Functional": + # apply anat2func matrix + mask_to_epi.inputs.apply_xfm = True + mask_to_epi.inputs.output_type = "NIFTI_GZ" + nuisance_wf.connect( + *( + pipeline_resource_pool["Functional_mean"] + + (mask_to_epi, "reference") + ) + ) + nuisance_wf.connect( + *( + pipeline_resource_pool["Transformations"][ + "anat_to_func_linear_xfm" + ] + + (mask_to_epi, "in_matrix_file") + ) + ) + + else: + resolution = regressor_selector["extraction_resolution"] + mask_to_epi.inputs.apply_isoxfm = resolution + + nuisance_wf.connect( + *( + pipeline_resource_pool[f"Anatomical_{resolution}mm"] + + (mask_to_epi, "reference") + ) + ) + + nuisance_wf.connect( + *(pipeline_resource_pool[prev_mask_key] + (mask_to_epi, "in_file")) + ) + + pipeline_resource_pool[mask_key] = (mask_to_epi, "out_file") + + if full_mask_key.startswith("CerebrospinalFluid"): + pipeline_resource_pool = ( + generate_summarize_tissue_mask_ventricles_masking( + nuisance_wf, + pipeline_resource_pool, + regressor_descriptor, + regressor_selector, + node_mask_key, + csf_mask_exist, + use_ants, + ventricle_mask_exist, + ) + ) + + elif step == "erosion": + erode_mask_node = pe.Node( + afni.Calc( + args="-b a+i -c a-i -d a+j -e a-j -f a+k -g a-k", + expr="a*(1-amongst(0,b,c,d,e,f,g))", + outputtype="NIFTI_GZ", + ), + name=f"{node_mask_key}", + ) + + nuisance_wf.connect( + *( + pipeline_resource_pool[prev_mask_key] + + (erode_mask_node, "in_file_a") + ) + ) + + pipeline_resource_pool[mask_key] = (erode_mask_node, "out_file") + + return pipeline_resource_pool, full_mask_key + + +def generate_summarize_tissue_mask_ventricles_masking( + nuisance_wf, + pipeline_resource_pool: dict, + regressor_descriptor, + regressor_selector, + mask_key, + csf_mask_exist, + use_ants=True, + ventricle_mask_exist=True, +) -> Optional[dict]: + """Update CSF mask to include only the lateral ventricles.""" + if not csf_mask_exist: + IFLOGGER.warning( + "Segmentation is Off, - therefore will be using " + "lateral_ventricle_mask as CerebrospinalFluid_mask." + ) + + # Mask CSF with Ventricles + if f"{mask_key}_Unmasked" not in pipeline_resource_pool: + if ventricle_mask_exist: + ventricles_key = "VentriclesToAnat" + + if "resolution" in regressor_descriptor: + ventricles_key += "_{}".format(regressor_descriptor["resolution"]) + + if ventricles_key not in pipeline_resource_pool: + transforms = pipeline_resource_pool["Transformations"] + + if use_ants: + # perform the transform using ANTS + collect_linear_transforms = pe.Node( + util.Merge(3), name=f"{ventricles_key}_ants_transforms" + ) + + nuisance_wf.connect( + *( + transforms["mni_to_anat_linear_xfm"] + + (collect_linear_transforms, "in1") + ) + ) + + # generate inverse transform flags, which depends on the number of transforms + inverse_transform_flags = pe.Node( + util.Function( + input_names=["transform_list"], + output_names=["inverse_transform_flags"], + function=generate_inverse_transform_flags, + ), + name=f"{ventricles_key}_inverse_transform_flags", + ) + nuisance_wf.connect( + collect_linear_transforms, + "out", + inverse_transform_flags, + "transform_list", + ) + + lat_ven_mni_to_anat = pe.Node( + interface=ants.ApplyTransforms(), + name=f"{ventricles_key}_ants", + mem_gb=0.683, + mem_x=( + 3811976743057169 / 302231454903657293676544, + "input_image", + ), + ) + lat_ven_mni_to_anat.inputs.interpolation = "NearestNeighbor" + lat_ven_mni_to_anat.inputs.dimension = 3 + + nuisance_wf.connect( + inverse_transform_flags, + "inverse_transform_flags", + lat_ven_mni_to_anat, + "invert_transform_flags", + ) + nuisance_wf.connect( + collect_linear_transforms, + "out", + lat_ven_mni_to_anat, + "transforms", + ) + + nuisance_wf.connect( + *( + pipeline_resource_pool["Ventricles"] + + (lat_ven_mni_to_anat, "input_image") + ) + ) + resolution = regressor_selector["extraction_resolution"] + + if csf_mask_exist: + nuisance_wf.connect( + *( + pipeline_resource_pool[mask_key] + + (lat_ven_mni_to_anat, "reference_image") + ) + ) + elif resolution == "Functional": + nuisance_wf.connect( + *( + pipeline_resource_pool["Functional_mean"] + + (lat_ven_mni_to_anat, "reference_image") + ) + ) + else: + nuisance_wf.connect( + *( + pipeline_resource_pool[f"Anatomical_{resolution}mm"] + + (lat_ven_mni_to_anat, "reference_image") + ) + ) + + pipeline_resource_pool[ventricles_key] = ( + lat_ven_mni_to_anat, + "output_image", + ) + + else: + # perform the transform using FLIRT + lat_ven_mni_to_anat = pe.Node( + interface=fsl.ApplyWarp(), + name=f"{ventricles_key}_fsl_applywarp", + ) + lat_ven_mni_to_anat.inputs.interp = "nn" + + nuisance_wf.connect( + *( + transforms["mni_to_anat_linear_xfm"] + + (lat_ven_mni_to_anat, "field_file") + ) + ) + nuisance_wf.connect( + *( + pipeline_resource_pool["Ventricles"] + + (lat_ven_mni_to_anat, "in_file") + ) + ) + nuisance_wf.connect( + *( + pipeline_resource_pool[mask_key] + + (lat_ven_mni_to_anat, "ref_file") + ) + ) + + pipeline_resource_pool[ventricles_key] = ( + lat_ven_mni_to_anat, + "out_file", + ) + + if csf_mask_exist: + # reduce CSF mask to the lateral ventricles + mask_csf_with_lat_ven = pe.Node( + interface=afni.Calc(outputtype="NIFTI_GZ"), + name=f"{mask_key}_Ventricles", + ) + mask_csf_with_lat_ven.inputs.expr = "a*b" + mask_csf_with_lat_ven.inputs.out_file = "csf_lat_ven_mask.nii.gz" + + nuisance_wf.connect( + *( + pipeline_resource_pool[ventricles_key] + + (mask_csf_with_lat_ven, "in_file_a") + ) + ) + nuisance_wf.connect( + *( + pipeline_resource_pool[mask_key] + + (mask_csf_with_lat_ven, "in_file_b") + ) + ) + + pipeline_resource_pool[f"{mask_key}_Unmasked"] = pipeline_resource_pool[ + mask_key + ] + pipeline_resource_pool[mask_key] = (mask_csf_with_lat_ven, "out_file") + + else: + pipeline_resource_pool[mask_key] = pipeline_resource_pool[ + ventricles_key + ] + + return pipeline_resource_pool + return None + + +class NuisanceRegressor(object): + """A nuisance regressor.""" + + def __init__(self, selector): + """Initialize the nuisance regressor.""" + self.selector = selector + + if "Bandpass" in self.selector: + s = self.selector["Bandpass"] + if not isinstance(s, dict) or ( + not s.get("bottom_frequency") and not s.get("top_frequency") + ): + del self.selector["Bandpass"] + + def get(self, key, default=None): + """Return the value of the key in the selector.""" + return self.selector.get(key, default) + + def __contains__(self, key): + """Return whether the key is in the selector.""" + return key in self.selector + + def __getitem__(self, key): + """Return the value of the key in the selector.""" + return self.selector[key] + + @staticmethod + def _derivative_params(selector): + nr_repr = "" + if not selector: + return nr_repr + if selector.get("include_squared"): + nr_repr += "S" + if selector.get("include_delayed"): + nr_repr += "D" + if selector.get("include_delayed_squared"): + nr_repr += "B" + if selector.get("include_backdiff"): + nr_repr += "V" + if selector.get("include_backdiff_squared"): + nr_repr += "C" + return nr_repr + + @staticmethod + def _summary_params(selector): + summ = selector["summary"] + + methods = { + "PC": "PC", + "DetrendPC": "DPC", + "Mean": "M", + "NormMean": "NM", + "DetrendMean": "DM", + "DetrendNormMean": "DNM", + } + + if isinstance(summ, dict): + method = summ["method"] + rep = methods[method] + if method in ["DetrendPC", "PC"]: + rep += "%d" % summ["components"] + else: + rep = methods[summ] + + return rep + + @staticmethod + def encode(selector: dict) -> str: + """Return a brief string representation of the nuisance regressor.""" + regs = OrderedDict( + [ + ("GreyMatter", "GM"), + ("WhiteMatter", "WM"), + ("CerebrospinalFluid", "CSF"), + ("tCompCor", "tC"), + ("aCompCor", "aC"), + ("GlobalSignal", "G"), + ("Motion", "M"), + ("Custom", "T"), + ("PolyOrt", "P"), + ("Bandpass", "BP"), + ("Censor", "C"), + ] + ) + + tissues = ["GreyMatter", "WhiteMatter", "CerebrospinalFluid"] + + selectors_representations = [] + + # tC-1.5PT-PC5S-SDB + # aC-WC-2mmE-PC5-SDB + + # WM-2mmE-PC5-SDB + # CSF-2mmE-M-SDB + # GM-2mmE-DNM-SDB + + # G-PC5-SDB + # M-SDB + # C-S-FD1.5SD-D1.5SD + # P-2 + # B-T0.01-B0.1 + + for r in regs.keys(): + if r not in selector: + continue + + s = selector[r] + + pieces = [regs[r]] + + if r in tissues: + if ( + s.get("extraction_resolution") + and s["extraction_resolution"] != "Functional" + ): + res = "%.2gmm" % s["extraction_resolution"] + if s.get("erode_mask"): + res += "E" + pieces += [res] + + pieces += [NuisanceRegressor._summary_params(s)] + pieces += [NuisanceRegressor._derivative_params(s)] + + elif r == "tCompCor": + threshold = "" + if s.get("by_slice"): + threshold += "S" + t = s.get("threshold") + if t: + if not isinstance(t, str): + t = "%.2f" % t + threshold += t + if s.get("erode_mask"): + threshold += "E" + if s.get("degree"): + d = s.get("degree") + threshold += str(d) + + pieces += [threshold] + pieces += [NuisanceRegressor._summary_params(s)] + pieces += [NuisanceRegressor._derivative_params(s)] + + elif r == "aCompCor": + if s.get("tissues"): + pieces += ["+".join([regs[t] for t in sorted(s["tissues"])])] + + if s.get("extraction_resolution"): + res = "%.2gmm" % s["extraction_resolution"] + if s.get("erode_mask"): + res += "E" + pieces += [res] + + pieces += [NuisanceRegressor._summary_params(s)] + pieces += [NuisanceRegressor._derivative_params(s)] + + elif r == "Custom": + for ss in s: + pieces += [ + os.path.basename(ss["file"])[0:5] + crc_encode(ss["file"]) + ] + + elif r == "GlobalSignal": + pieces += [NuisanceRegressor._summary_params(s)] + pieces += [NuisanceRegressor._derivative_params(s)] + + elif r == "Motion": + pieces += [NuisanceRegressor._derivative_params(s)] + + elif r == "PolyOrt": + pieces += ["%d" % s["degree"]] + + elif r == "Bandpass": + if s.get("bottom_frequency"): + pieces += ["B%.2g" % s["bottom_frequency"]] + if s.get("top_frequency"): + pieces += ["T%.2g" % s["top_frequency"]] + + elif r == "Censor": + censoring = { + "Kill": "K", + "Zero": "Z", + "Interpolate": "I", + "SpikeRegression": "S", + } + + thresholds = { + "FD_J": "FD-J", + "FD_P": "FD-P", + "DVARS": "DV", + } + + pieces += [censoring[s["method"]]] + + trs_range = ["0", "0"] + if s.get("number_of_previous_trs_to_censor"): + trs_range[0] = "%d" % s["number_of_previous_trs_to_censor"] + if s.get("number_of_subsequent_trs_to_censor"): + trs_range[1] = "%d" % s["number_of_subsequent_trs_to_censor"] + + pieces += ["+".join(trs_range)] + + threshs = sorted(s["thresholds"], reverse=True, key=lambda d: d["type"]) + for st in threshs: + thresh = thresholds[st["type"]] + if isinstance(st["value"], str): + thresh += st["value"] + else: + thresh += "%.2g" % st["value"] + + pieces += [thresh] + + selectors_representations += ["-".join([_f for _f in pieces if _f])] + + return "_".join(selectors_representations) + + def __repr__(self) -> str: + """Return a string representation of the nuisance regressor.""" + return NuisanceRegressor.encode(self.selector) diff --git a/CPAC/pipeline/check_outputs.py b/CPAC/pipeline/check_outputs.py index f7d47c1424..20fb165805 100644 --- a/CPAC/pipeline/check_outputs.py +++ b/CPAC/pipeline/check_outputs.py @@ -25,7 +25,7 @@ from CPAC.utils.bids_utils import with_key, without_key from CPAC.utils.datasource import bidsier_prefix -from CPAC.utils.monitoring.custom_logging import MockLogger, getLogger, set_up_logger +from CPAC.utils.monitoring.custom_logging import getLogger, MockLogger, set_up_logger def check_outputs(output_dir: str, log_dir: str, pipe_name: str, unique_id: str) -> str: @@ -87,8 +87,9 @@ def check_outputs(output_dir: str, log_dir: str, pipe_name: str, unique_id: str) ): missing_outputs += (subdir, filename) except Exception as exception: # pylint: disable=broad-except - logger = getLogger("nipype.workflow") - logger.error(str(exception)) + from CPAC.utils.monitoring import WFLOGGER + + WFLOGGER.error(str(exception)) if missing_outputs: missing_log = set_up_logger( f"missingOutputs_{unique_id}", @@ -169,7 +170,8 @@ class ExpectedOutputs: def __init__(self, expected=None): self.expected_outputs = {} if expected is None else expected if not isinstance(self.expected_outputs, dict): - raise TypeError("ExpectedOutputs.expected_outputs must be a dict") + msg = "ExpectedOutputs.expected_outputs must be a dict" + raise TypeError(msg) def __bool__(self): return bool(len(self)) @@ -182,10 +184,11 @@ def __iter__(self): def __iadd__(self, other): if not isinstance(other, tuple) or len(other) != 2: - raise TypeError( + msg = ( f"{self.__module__}.{self.__class__.__name__} requires a " "tuple of ('subdir', 'output') for addition" ) + raise TypeError(msg) self.add(*other) return self diff --git a/CPAC/pipeline/cpac_basc_pipeline.py b/CPAC/pipeline/cpac_basc_pipeline.py index 3a14b09168..3ef5146649 100644 --- a/CPAC/pipeline/cpac_basc_pipeline.py +++ b/CPAC/pipeline/cpac_basc_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -21,10 +21,13 @@ from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin from CPAC.utils import Configuration +from CPAC.utils.monitoring import IFLOGGER def prep_basc_workflow(c, subject_infos): + IFLOGGER.info("Preparing BASC workflow") p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) + IFLOGGER.info("Subjects %s", s_ids) wf = pe.Workflow(name="basc_workflow") wf.base_dir = c.pipeline_setup["working_directory"]["path"] diff --git a/CPAC/pipeline/cpac_cwas_pipeline.py b/CPAC/pipeline/cpac_cwas_pipeline.py index 368e53576e..cfe56f9012 100644 --- a/CPAC/pipeline/cpac_cwas_pipeline.py +++ b/CPAC/pipeline/cpac_cwas_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -21,10 +21,13 @@ from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin from CPAC.utils.configuration import Configuration +from CPAC.utils.monitoring import IFLOGGER def prep_cwas_workflow(c, subject_infos): + IFLOGGER.info("Preparing CWAS workflow") p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) + IFLOGGER.info("Subjects %s", s_ids) wf = pe.Workflow(name="cwas_workflow") wf.base_dir = c.pipeline_setup["working_directory"]["path"] diff --git a/CPAC/pipeline/cpac_ga_model_generator.py b/CPAC/pipeline/cpac_ga_model_generator.py index fb9ace888f..d4ebfb8747 100755 --- a/CPAC/pipeline/cpac_ga_model_generator.py +++ b/CPAC/pipeline/cpac_ga_model_generator.py @@ -1,23 +1,24 @@ -"""Copyright (C) 2022 C-PAC Developers. +# Copyright (C) 2022-2024 C-PAC Developers. -This file is part of C-PAC. +# This file is part of C-PAC. -C-PAC is free software: you can redistribute it and/or modify it under -the terms of the GNU Lesser General Public License as published by the -Free Software Foundation, either version 3 of the License, or (at your -option) any later version. +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -C-PAC is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. -You should have received a copy of the GNU Lesser General Public -License along with C-PAC. If not, see . -""" +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import re +from CPAC.utils.monitoring import IFLOGGER + def write_new_sub_file(current_mod_path, subject_list, new_participant_list): # write the new participant list @@ -103,7 +104,7 @@ def check_merged_file(list_of_output_files, merged_outfile): test_string = [ "3ddot", "-demean", - "{0}[{1}]".format(merged_outfile, str(i)), + f"{merged_outfile}[{i!s}]", output_file, ] @@ -198,6 +199,14 @@ def check_mask_file_resolution( roi_mask_dims = roi_mask_hdr.get_zooms() if raw_file_dims != roi_mask_dims: + IFLOGGER.warning( + "The custom ROI mask file is a different resolution than the output data!" + " Resampling the ROI mask file to match the original output data!" + "\n\nCustom ROI mask file: %s\n\nOutput measure: %s\n\n", + roi_mask, + output_id, + ) + resampled_outfile = os.path.join( out_dir, "resampled_%s" % os.path.basename(roi_mask) ) @@ -429,12 +438,12 @@ def patsify_design_formula(formula, categorical_list, encoding="Treatment"): # pad end with single space so the formula.replace below won't miss the last # covariate when relevant - formula = "{0} ".format(formula) + formula = f"{formula} " for ev in categorical_list: if ev in formula: new_ev = "C(" + ev + closer - formula = formula.replace(" {0} ".format(ev), new_ev) + formula = formula.replace(f" {ev} ", new_ev) # remove Intercept - if user wants one, they should add "+ Intercept" when # specifying the design formula @@ -455,19 +464,36 @@ def patsify_design_formula(formula, categorical_list, encoding="Treatment"): def check_multicollinearity(matrix): import numpy as np + IFLOGGER.info("\nChecking for multicollinearity in the model..") + U, s, V = np.linalg.svd(matrix) max_singular = np.max(s) min_singular = np.min(s) + IFLOGGER.info( + "Max singular: %s\nMin singular: %s\nRank: %s\n", + max_singular, + min_singular, + np.linalg.matrix_rank(matrix), + ) + if min_singular == 0: - pass + IFLOGGER.warning( + "[!] CPAC warns: Detected multicollinearity in the computed group-level" + " analysis model. Please double-check your model design.\n\n" + ) else: condition_number = float(max_singular) / float(min_singular) + IFLOGGER.info("Condition number: %f", condition_number) if condition_number > 30: - pass + IFLOGGER.warning( + "[!] CPAC warns: Detected multicollinearity in the computed" + " group-level analysis model. Please double-check your model" + " design.\n\n" + ) else: - pass + IFLOGGER.info("Looks good..\n") def create_contrasts_dict(dmatrix_obj, contrasts_list, output_measure): @@ -530,7 +556,7 @@ def build_feat_model( # sublist_txt = group_config_obj.participant_list # if sublist_txt == None: - # print ("Warning! You have not provided a subject list. CPAC will use all the subjects in pipeline directory") + # IFLOGGER.warning("Warning! You have not provided a subject list. CPAC will use all the subjects in pipeline directory") # sublist_txt = group_config_obj.participant_list # else: # sublist_txt = group_config_obj.particpant_list @@ -585,8 +611,8 @@ def build_feat_model( group_config_obj.output_dir, "cpac_group_analysis", "FSL_FEAT", - "{0}".format(pipeline_ID), - "group_model_{0}".format(model_name), + f"{pipeline_ID}", + f"group_model_{model_name}", ) out_dir = os.path.join( @@ -829,7 +855,7 @@ def build_feat_model( "to model each group's variances separately) " "either have more than 2 levels (1/0), or are " "not encoded as 1's and 0's.\n\nCovariates:\n" - "{0}\n{1}\n\n".format(group_ev[0], group_ev[1]) + f"{group_ev[0]}\n{group_ev[1]}\n\n" ) raise Exception(err) @@ -850,9 +876,7 @@ def build_feat_model( "to model each group's variances separately) " "either have more than 2 levels (1/0), or are " "not encoded as 1's and 0's.\n\nCovariates:\n" - "{0}\n{1}\n{2}\n\n".format( - group_ev[0], group_ev[1], group_ev[2] - ) + f"{group_ev[0]}\n{group_ev[1]}\n{group_ev[2]}\n\n" ) raise Exception(err) @@ -868,8 +892,8 @@ def build_feat_model( "3. For some reason, the configuration has been set up " "in a way where CPAC currently thinks you're including " "only one group, or more than three, neither of which " - "are supported.\n\nGroups provided:\n{0}" - "\n\n".format(str(group_ev)) + f"are supported.\n\nGroups provided:\n{group_ev!s}" + "\n\n" ) raise Exception(err) @@ -975,35 +999,29 @@ def build_feat_model( # check to make sure there are more time points than EVs! if len(column_names) >= num_subjects: - err = ( - "\n\n################## MODEL NOT GENERATED ##################" - "\n\n[!] CPAC says: There are more EVs than there are " - "participants currently included in the model for:\n\n" - "Derivative: {0}\nSession: {1}\nScan: {2}\nPreproc strategy:" - "\n {3}\n\n" - "There must be more participants than EVs in the design.\n\n" - "Number of participants: {4}\nNumber of EVs: {5}\n\nEV/" - "covariate list: {6}\n\nNote: If you specified to model group " - "variances separately, the amount of EVs can nearly double " - "once they are split along the grouping variable.\n\nIf the " - "number of participants is lower than the number of " - "participants in your group analysis inclusion list, this " - "may be because not every participant originally included has " - "an output for {7} for this scan and preprocessing strategy in " - "the individual-level analysis output directory.\n\nDesign " - "formula going in: {8}" - "\n\n#########################################################" - "\n\n".format( - resource_id, - session_id, - series_or_repeated_label, - preproc_strat, - num_subjects, - len(column_names), - column_names, - resource_id, - design_formula, - ) + IFLOGGER.error( + "\n\n################## MODEL NOT GENERATED ##################\n\n[!] CPAC" + " says: There are more EVs than there are participants currently included" + " in the model for:\n\nDerivative: %s\nSession: %s\nScan: %s\nPreproc" + " strategy:\n %s\n\nThere must be more participants than EVs in the" + " design.\n\nNumber of participants: %s\nNumber of EVs: %s\n\nEV/covariate" + " list: %s\n\nNote: If you specified to model group variances separately," + " the amount of EVs can nearly double once they are split along the" + " grouping variable.\n\nIf the number of participants is lower than the" + " number of participants in your group analysis inclusion list, this may" + " be because not every participant originally included has an output for" + " %s for this scan and preprocessing strategy in the individual-level" + " analysis output directory.\n\nDesign formula going in: %s" + "\n\n#########################################################\n\n", + resource_id, + session_id, + series_or_repeated_label, + preproc_strat, + num_subjects, + len(column_names), + column_names, + resource_id, + design_formula, ) # check the merged file's order @@ -1070,7 +1088,7 @@ def build_feat_model( contrasts_columns = column_names if group_config_obj.f_tests: for i in group_config_obj.f_tests[1 : len(group_config_obj.f_tests) - 1]: - contrasts_columns.append("f_test_{0}".format(i)) + contrasts_columns.append(f"f_test_{i}") else: pass @@ -1091,15 +1109,15 @@ def build_feat_model( "\n\n[!] C-PAC says: It appears you have modified your " "contrasts CSV file already- back up this file before " "building your model again to avoid overwriting your " - "changes.\n\nContrasts file:\n{0}" - "\n\n".format(contrast_out_path) + f"changes.\n\nContrasts file:\n{contrast_out_path}" + "\n\n" ) raise Exception(msg) with open(contrast_out_path, "w") as f: f.write("Contrasts") for col in contrasts_columns: - f.write(",{0}".format(col)) + f.write(f",{col}") f.write("\ncontrast_1") for col in contrasts_columns: f.write(",0") @@ -1107,25 +1125,25 @@ def build_feat_model( groups_out_path = os.path.join(model_path, "groups.txt") with open(groups_out_path, "w") as f: for val in grp_vector: - f.write("{0}\n".format(val)) - - msg = ( - "Model successfully generated for..\nDerivative: {0}\nSession: {1}" - "\nScan: {2}\nPreprocessing strategy:\n {3}\n\nModel directory:" - "\n{4}\n\nGroup configuration file:\n{5}\n\nContrasts template CSV:" - "\n{6}\n\nDefine your contrasts in this contrasts template CSV and " - "save your changes, then run FSL-FEAT " - "through the command-line like so:\n\n cpac group " - "feat run " - "\n".format( - resource_id, - session_id, - series_or_repeated_label, - preproc_strat, - model_path, - group_config_file, - contrast_out_path, - ) + f.write(f"{val}\n") + + hrow = "-------------------------------------------------------------------\n" + IFLOGGER.info( + "%sModel successfully generated for..\nDerivative: %s\nSession: %s\nScan: %s" + "\nPreprocessing strategy:\n %s\n\nModel directory:\n%s\n\nGroup" + " configuration file:\n%s\n\nContrasts template CSV:\n%s\n\nDefine your" + " contrasts in this contrasts template CSV and save your changes, then run" + " FSL-FEAT through the command-line like so:\n\n cpac group feat run \n%s", + hrow, + resource_id, + session_id, + series_or_repeated_label, + preproc_strat, + model_path, + group_config_file, + contrast_out_path, + hrow, ) return dmat_csv_path, new_sub_file, contrast_out_path diff --git a/CPAC/pipeline/cpac_group_runner.py b/CPAC/pipeline/cpac_group_runner.py index 7b44a833ee..70328eaccb 100644 --- a/CPAC/pipeline/cpac_group_runner.py +++ b/CPAC/pipeline/cpac_group_runner.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,63 +14,60 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Run a group-level analysis with C-PAC.""" import fnmatch import os +from CPAC.utils.monitoring import WFLOGGER +from CPAC.utils.typing import LIST -def load_config_yml(config_file, individual=False): - # loads a configuration YAML file - # - # input - # config_file: full filepath to YAML (.yml) file - # - # output - # config: Configuration object +def load_config_yml(config_file: str) -> dict: + """Load a configuration YAML file. + + Parameters + ---------- + config_file : str + full filepath to YAML (.yml) file + + Returns + ------- + config : dict + """ import os import yaml try: config_path = os.path.realpath(config_file) - config_dict = yaml.safe_load(open(config_path, "r")) - config = config_dict - except Exception as e: + except (OSError, TypeError, yaml.YAMLError) as e: err = ( "\n\n[!] CPAC says: Could not load or read the configuration " - "YAML file:\n%s\nDetails: %s\n\n" % (config_file, e) - ) - raise Exception(err) - - if individual: - config.logDirectory = os.path.abspath( - config["pipeline_setup"]["log_directory"]["path"] - ) - config.workingDirectory = os.path.abspath( - config["pipeline_setup"]["working_directory"]["path"] - ) - config.outputDirectory = os.path.abspath( - config["pipeline_setup"]["output_directory"]["output_path"] - ) - config.crashLogDirectory = os.path.abspath( - config["pipeline_setup"]["crash_log_directory"]["path"] + f"YAML file:\n{config_file}\n" ) + raise yaml.YAMLError(err) from e return config -def load_text_file(filepath, label="file"): - # loads a text file and returns the lines in a list - # - # input - # filepath: full filepath to the text file - # - # output - # lines_list: list of lines from text file +def load_text_file(filepath: str, label: str = "file") -> LIST[str]: + """Load a text file and returns the lines in a list. + Parameters + ---------- + filepath : str + full filepath to the text file + + label : str + + Returns + ------- + lines_list : list of str + list of lines from text file + """ if not filepath.endswith(".txt"): err = ( "\n\n[!] CPAC says: The %s should be a text file (.txt).\n" @@ -93,6 +90,7 @@ def load_text_file(filepath, label="file"): def grab_pipeline_dir_subs(pipeline_dir, ses=False): + """Grab the subject IDs from the pipeline output directory.""" import os inclusion_list = [] @@ -124,23 +122,24 @@ def read_pheno_csv_into_df(pheno_csv, id_label=None): pheno_df = pd.read_table(f, dtype={id_label: object}) else: pheno_df = pd.read_csv(f, dtype={id_label: object}) + elif ".tsv" in pheno_csv or ".TSV" in pheno_csv: + pheno_df = pd.read_table(f) else: - if ".tsv" in pheno_csv or ".TSV" in pheno_csv: - pheno_df = pd.read_table(f) - else: - pheno_df = pd.read_csv(f) + pheno_df = pd.read_csv(f) return pheno_df def gather_nifti_globs(pipeline_output_folder, resource_list, pull_func=False): - # the number of directory levels under each participant's output folder - # can vary depending on what preprocessing strategies were chosen, and - # there may be several output filepaths with varying numbers of directory - # levels + """Gather the NIFTI file globs for the derivatives selected. - # this parses them quickly while also catching each preprocessing strategy + The number of directory levels under each participant's output folder + can vary depending on what preprocessing strategies were chosen, and + there may be several output filepaths with varying numbers of directory + levels. + This parses them quickly while also catching each preprocessing strategy. + """ import glob import os @@ -156,7 +155,7 @@ def gather_nifti_globs(pipeline_output_folder, resource_list, pull_func=False): except Exception as e: err = ( "\n[!] Could not access or read the cpac_outputs.tsv " - "resource file:\n{0}\n\nError details {1}\n".format(keys_tsv, e) + f"resource file:\n{keys_tsv}\n\nError details {e}\n" ) raise Exception(err) @@ -177,6 +176,10 @@ def gather_nifti_globs(pipeline_output_folder, resource_list, pull_func=False): # remove any extra /'s pipeline_output_folder = pipeline_output_folder.rstrip("/") + WFLOGGER.info( + "\n\nGathering the output file paths from %s...", pipeline_output_folder + ) + # this is just to keep the fsl feat config file derivative_list entries # nice and lean dirs_to_grab = [] @@ -196,7 +199,7 @@ def gather_nifti_globs(pipeline_output_folder, resource_list, pull_func=False): # ends with a NIFTI file prog_string = ".." while len(glob.glob(glob_string)) != 0: - if any(exts in x for x in glob.glob(glob_string)) is True: + if any(exts in x for x in glob.glob(glob_string)): nifti_globs.append(glob_string) glob_string = os.path.join(glob_string, "*") @@ -215,6 +218,7 @@ def gather_nifti_globs(pipeline_output_folder, resource_list, pull_func=False): def grab_raw_score_filepath(filepath, resource_id): + """Grab the filepath for the raw score of the resource.""" # this lives in the output path collector import glob @@ -250,7 +254,7 @@ def grab_raw_score_filepath(filepath, resource_id): raw_score_path = raw_score_path.replace(raw_score_path.split("/")[-1], "") try: raw_score_path = glob.glob(os.path.join(raw_score_path, "*"))[0] - except: + except (FileNotFoundError, IndexError, TypeError): raw_score_path = os.path.join(raw_score_path, "*") if (raw_score_path is None) or (not os.path.exists(raw_score_path)): @@ -266,6 +270,7 @@ def grab_raw_score_filepath(filepath, resource_id): def find_power_params_file(filepath, resource_id, series_id): + """Find the power parameters file for the participant and series.""" import os try: @@ -302,13 +307,14 @@ def find_power_params_file(filepath, resource_id, series_id): def extract_power_params(power_params_lines, power_params_filepath): + """Extract the power parameters from the power parameters file.""" # check formatting - if len(power_params_lines) != 2: + if len(power_params_lines) != 2: # noqa: PLR2004 err = ( "\n\n[!] There is something wrong with the formatting of the " - "power parameters file.\nFilepath: %s\n\n" % power_params_filepath + f"power parameters file.\nFilepath: {power_params_filepath}\n\n" ) - raise Exception(err) + raise ValueError(err) names_list = power_params_lines[0].split(",") values_list = power_params_lines[1].split(",") @@ -352,6 +358,7 @@ def create_output_dict_list( derivatives=None, exts=["nii", "nii.gz"], ): + """Create a dictionary of output filepaths and their associated information.""" import os import pandas as pd @@ -368,7 +375,7 @@ def create_output_dict_list( except Exception as e: err = ( "\n[!] Could not access or read the cpac_outputs.csv " - "resource file:\n{0}\n\nError details {1}\n".format(keys_tsv, e) + f"resource file:\n{keys_tsv}\n\nError details {e}\n" ) raise Exception(err) @@ -385,6 +392,10 @@ def create_output_dict_list( # remove any extra /'s pipeline_output_folder = pipeline_output_folder.rstrip("/") + WFLOGGER.info( + "\n\nGathering the output file paths from %s...", pipeline_output_folder + ) + # this is just to keep the fsl feat config file derivatives entries # nice and lean search_dirs = [] @@ -448,6 +459,10 @@ def create_output_dict_list( new_row_dict["Series"] = series_id new_row_dict["Filepath"] = filepath + WFLOGGER.info( + "%s - %s - %s", unique_id.split("_")[0], series_id, resource_id + ) + if get_motion: # if we're including motion measures power_params_file = find_power_params_file( @@ -475,6 +490,7 @@ def create_output_dict_list( def create_output_df_dict(output_dict_list, inclusion_list=None): + """Create a dictionary of dataframes from the output_dict_list.""" import pandas as pd output_df_dict = {} @@ -491,6 +507,12 @@ def create_output_df_dict(output_dict_list, inclusion_list=None): new_df = new_df[new_df.participant_id.isin(inclusion_list)] if new_df.empty: + WFLOGGER.warning( + "No outputs found for %s for the participants listed in the the group" + " analysis participant list you used. Skipping generating a model for" + " this output.", + unique_resource_id, + ) continue # unique_resource_id is tuple (resource_id,strat_info) @@ -509,6 +531,7 @@ def gather_outputs( get_func=False, derivatives=None, ): + """Gather the output filepaths and their associated information.""" nifti_globs = gather_nifti_globs(pipeline_folder, resource_list, get_func) output_dict_list = create_output_dict_list( @@ -525,9 +548,9 @@ def gather_outputs( def pheno_sessions_to_repeated_measures(pheno_df, sessions_list): - import pandas as pd + """Match participant-session IDs for FEAT repeated measures analysis. - """Take in the selected session names, and match them to the unique + Take in the selected session names, and match them to the unique participant-session IDs appropriately for an FSL FEAT repeated measures analysis. @@ -547,6 +570,7 @@ def pheno_sessions_to_repeated_measures(pheno_df, sessions_list): sub01 ses02 1 0 sub02 ses02 0 1 """ + import pandas as pd # first, check to see if this design matrix setup has already been done # in the pheno CSV file @@ -560,7 +584,7 @@ def pheno_sessions_to_repeated_measures(pheno_df, sessions_list): "Sessions" in pheno_df.columns or "Sessions_column_one" in pheno_df.columns ): for part_id in pheno_df["participant_id"]: - if "participant_{0}".format(part_id) in pheno_df.columns: + if f"participant_{part_id}" in pheno_df.columns: continue break else: @@ -619,21 +643,22 @@ def pheno_sessions_to_repeated_measures(pheno_df, sessions_list): # add new participant ID columns for sub_id in sublist: - new_col = "participant_{0}".format(sub_id) + new_col = f"participant_{sub_id}" pheno_df[new_col] = participant_id_cols[new_col] return pheno_df.astype("object") def pheno_series_to_repeated_measures(pheno_df, series_list, repeated_sessions=False): - import pandas as pd + """Take in the selected series/scans, and create all of the permutations... + + ...of unique participant IDs (participant_site_session) and series/scans + and populate the pheno - # take in the selected series/scans, and create all of the permutations - # of unique participant IDs (participant_site_session) and series/scans - # and populate the pheno - # this is so the user does not have to have a specially-formatted - # version of the phenotype CSV for repeated measures; they can just - # enter the regular one + This is so the user does not have to have a specially-formatted version of the + phenotype CSV for repeated measures; they can just enter the regular one. + """ + import pandas as pd # first, check to see if this design matrix setup has already been done # in the pheno CSV file @@ -643,7 +668,7 @@ def pheno_series_to_repeated_measures(pheno_df, series_list, repeated_sessions=F num_partic_cols += 1 if num_partic_cols > 1 and "Series" in pheno_df.columns: for part_id in pheno_df["participant_id"]: - if "participant_{0}".format(part_id) in pheno_df.columns: + if f"participant_{part_id}" in pheno_df.columns: continue break else: @@ -682,16 +707,18 @@ def pheno_series_to_repeated_measures(pheno_df, series_list, repeated_sessions=F def balance_repeated_measures(pheno_df, sessions_list, series_list=None): - # this is for repeated measures only. - # if the user selects a participant list like this: - # sub001_session_1 - # sub001_session_2 - # sub002_session_1 - # sub002_session_2 - # sub003_session_1 - # then have this drop "sub003_session_1", because repeated measures - # requires a uniform balance of repeats - + """Balance the repeated measures design matrix. + + This is for repeated measures only. + If the user selects a participant list like this: + sub001_session_1 + sub001_session_2 + sub002_session_1 + sub002_session_2 + sub003_session_1 + then have this drop "sub003_session_1", because repeated measures + requires a uniform balance of repeats + """ from collections import Counter part_ID_count = Counter(pheno_df["participant_id"]) @@ -706,18 +733,22 @@ def balance_repeated_measures(pheno_df, sessions_list, series_list=None): if part_ID_count[part_ID] != sessions_x_series: pheno_df = pheno_df[pheno_df.participant_id != part_ID] try: - del pheno_df["participant_%s" % part_ID] - except: + del pheno_df[f"participant_{part_ID}"] + except (KeyError, TypeError): pass dropped_parts.append(part_ID) return pheno_df, dropped_parts -def prep_feat_inputs(group_config_file): - # Preps group analysis run - # config_file: filepath to the C-PAC group-level config file +def prep_feat_inputs(group_config_file: str) -> dict: + """Prep group analysis run. + Parameters + ---------- + config_file : str + filepath to the C-PAC group-level config file + """ import os import pandas as pd @@ -729,7 +760,7 @@ def prep_feat_inputs(group_config_file): except Exception as e: err = ( "\n[!] Could not access or read the cpac_outputs.tsv " - "resource file:\n{0}\n\nError details {1}\n".format(keys_tsv, e) + f"resource file:\n{keys_tsv}\n\nError details {e}\n" ) raise Exception(err) @@ -758,16 +789,18 @@ def prep_feat_inputs(group_config_file): inclusion_list = grab_pipeline_dir_subs(pipeline_dir) elif "." in group_model.participant_list: if not os.path.isfile(group_model.participant_list): - raise Exception( + msg = ( "\n[!] C-PAC says: Your participant " "inclusion list is not a valid file!\n\n" - "File path: {0}" - "\n".format(group_model.participant_list) - ) - else: - inclusion_list = load_text_file( - group_model.participant_list, "group-level analysis participant " "list" + f"File path: {group_model.participant_list}" + "\n" ) + if os.path.exists(group_model.participant_list): + raise ValueError(msg) + raise FileNotFoundError(msg) + inclusion_list = load_text_file( + group_model.participant_list, "group-level analysis participant list" + ) else: inclusion_list = grab_pipeline_dir_subs(pipeline_dir) @@ -813,7 +846,7 @@ def prep_feat_inputs(group_config_file): "\n\n[!] There are no derivatives listed in the " "derivative_list field of your group analysis " "configuration file.\n\nConfiguration file: " - "{0}\n".format(group_config_file) + f"{group_config_file}\n" ) raise Exception(err) @@ -865,11 +898,12 @@ def prep_feat_inputs(group_config_file): output_df = output_df[output_df["participant_id"].isin(inclusion_list)] elif os.path.isfile(group_model.participant_list): inclusion_list = load_text_file( - group_model.participant_list, "group-level analysis " "participant list" + group_model.participant_list, "group-level analysis participant list" ) output_df = output_df[output_df["participant_id"].isin(inclusion_list)] else: - raise Exception("\nCannot read group-level analysis participant " "list.\n") + msg = "\nCannot read group-level analysis participant list.\n" + raise Exception(msg) new_pheno_df = pheno_df.copy() @@ -891,12 +925,13 @@ def prep_feat_inputs(group_config_file): output_df["participant_id"] = new_sublist_subs break else: - raise Exception( + msg = ( "the participant IDs in your group " "analysis participant list and the " "participant IDs in your phenotype file " "do not match" ) + raise Exception(msg) repeated_measures = False repeated_sessions = False @@ -983,7 +1018,7 @@ def prep_feat_inputs(group_config_file): series = "repeated_measures_multiple_series" if "session" in output_df: for ses_df_tuple in new_pheno_df.groupby("Sessions"): - session = "ses-{0}".format(ses_df_tuple[0]) + session = f"ses-{ses_df_tuple[0]}" ses_df = ses_df_tuple[1] # send it in @@ -1094,7 +1129,7 @@ def prep_feat_inputs(group_config_file): # multiple sessions? if "Sessions" in series_df: for ses_df_tuple in series_df.groupby("Sessions"): - session = "ses-{0}".format(ses_df_tuple[0]) + session = f"ses-{ses_df_tuple[0]}" ses_df = ses_df_tuple[1] newer_ses_pheno_df = pd.merge( newer_pheno_df, ses_df, how="inner", on=["participant_id"] @@ -1141,6 +1176,7 @@ def prep_feat_inputs(group_config_file): def build_feat_models(group_config_file): + """Build FSL's FEAT models for group analysis.""" import os from CPAC.pipeline.cpac_ga_model_generator import build_feat_model @@ -1173,11 +1209,11 @@ def build_feat_models(group_config_file): if os.path.isfile(empty_csv): return 0 - else: - return -1 + return -1 def run_feat(group_config_file, feat=True): + """Run FSL's FEAT tool for group analysis.""" from multiprocessing import Process import os @@ -1203,8 +1239,8 @@ def run_feat(group_config_file, feat=True): out_dir, "cpac_group_analysis", "FSL_FEAT", - "{0}".format(pipeline_name), - "group_model_{0}".format(model_name), + f"{pipeline_name}", + f"group_model_{model_name}", ) custom_contrasts_csv = os.path.join(model_dir, "contrasts.csv") @@ -1257,9 +1293,9 @@ def run_feat(group_config_file, feat=True): if len(models) == 0: err = ( "\n\n[!] C-PAC says: Cannot find the FSL-FEAT/Randomise model " - "files.\n\nI am looking here:\n{0}\n\nIf that doesn't sound " + f"files.\n\nI am looking here:\n{model_dir}\n\nIf that doesn't sound " "right, double-check your group configuration file.\n\nDid you " - "build the model beforehand?\n\n".format(model_dir) + "build the model beforehand?\n\n" ) raise Exception(err) @@ -1317,8 +1353,17 @@ def run_feat(group_config_file, feat=True): f_test = False if not con: - "\n\n################## MODEL NOT BEING INCLUDED ###########" "#######" "\n\n[!] C-PAC says: There is a mismatch between the design " "matrix and contrasts matrix for this model:\n\n" "Derivative: {0}\nSession: {1}\nScan: {2}\nPreprocessing " "strategy:\n {3}\n\nThe model is not proceeding into the " "FSL-FEAT FLAME run.\n\n" "#########################################################" "\n".format( - id_tuple[0], id_tuple[1], id_tuple[2], id_tuple[3] + WFLOGGER.warning( + "\n\n################## MODEL NOT BEING INCLUDED ##################" + "\n\n[!] C-PAC says: There is a mismatch between the design matrix and" + " contrasts matrix for this model:\n\nDerivative: %s\nSession: %s" + "\nScan: %s\nPreprocessing strategy:\n %s\n\nThe model is not" + " proceeding into the FSL-FEAT FLAME run." + "\n\n#########################################################\n", + id_tuple[0], + id_tuple[1], + id_tuple[2], + id_tuple[3], ) continue @@ -1386,6 +1431,7 @@ def run_cwas_group( z_score, inclusion=None, ): + """Run a group-level CWAS analysis.""" import os from CPAC.cwas.pipeline import create_cwas @@ -1407,7 +1453,7 @@ def run_cwas_group( inclusion_list = None if inclusion: - inclusion_list = load_text_file(inclusion, "MDMR participant " "inclusion list") + inclusion_list = load_text_file(inclusion, "MDMR participant inclusion list") output_df_dct = gather_outputs( pipeline_dir, @@ -1446,7 +1492,7 @@ def run_cwas_group( plugin = "MultiProc" cwas_wf = create_cwas( - name="MDMR_{0}".format(df_scan), + name=f"MDMR_{df_scan}", working_dir=working_dir, crash_dir=crash_dir, ) @@ -1462,6 +1508,7 @@ def run_cwas_group( def run_cwas(pipeline_config): + """Run CWAS.""" import os import yaml @@ -1511,15 +1558,20 @@ def run_cwas(pipeline_config): ) -def find_other_res_template(template_path, new_resolution): - """ - Find the same template/standard file in another resolution, if it - exists. - template_path: file path to the template NIfTI file. +def find_other_res_template(template_path: str, new_resolution: int) -> str: + """Find the same template/standard file in another resolution, if it exists. - new_resolution: (int) the resolution of the template file you need - NOTE: Makes an assumption regarding the filename format of the files. + Parameters + ---------- + template_path : str + file path to the template NIfTI file. + new_resolution : int + the resolution of the template file you need + + Notes + ----- + Makes an assumption regarding the filename format of the files. """ # TODO: this is assuming there is a mm resolution in the file path - not # TODO: robust to varying templates - look into alternatives @@ -1527,29 +1579,36 @@ def find_other_res_template(template_path, new_resolution): ref_file = None if "mm" in template_path: - template_parts = template_path.rsplit("mm", 1) - - if len(template_parts) < 2: + parts = {} + try: + parts["left"], parts["right"] = template_path.rsplit("mm", 1) + except ValueError: # TODO: better message - raise Exception("no resolution in the file path!") + msg = "no resolution in the file path!" + raise Exception(msg) - template_parts[0] = str(new_resolution).join( - template_parts[0].rsplit(template_parts[0][-1], 1) + parts["left"] = str(new_resolution).join( + parts["left"].rsplit(parts["left"][-1], 1) ) - ref_file = "{0}{1}".format(template_parts[0], template_parts[1]) + ref_file = f"{parts['left']}{parts['right']}" elif "${resolution_for_func_preproc}" in template_path: ref_file = template_path.replace( - "${resolution_for_func_preproc}", "{0}mm".format(new_resolution) + "${resolution_for_func_preproc}", f"{new_resolution}mm" ) if ref_file: - pass + WFLOGGER.info( + "\nAttempting to find %smm version of the template:\n%s\n\n", + new_resolution, + ref_file, + ) return ref_file def check_cpac_output_image(image_path, reference_path, out_dir=None, roi_file=False): + """Check if the image needs to be resampled. If so, return the command.""" import os import nibabel as nib @@ -1580,42 +1639,67 @@ def check_cpac_output_image(image_path, reference_path, out_dir=None, roi_file=F # check: do we even need to resample? if int(image_nb.header.get_zooms()[0]) != int(ref_nb.header.get_zooms()[0]): + WFLOGGER.info( + "Input image resolution is %smm\nTemplate image resolution is %smm\n", + image_nb.header.get_zooms()[0], + ref_nb.header.get_zooms()[0], + ) resample = True if image_nb.shape != ref_nb.shape: + WFLOGGER.info( + "Input image shape is %s\nTemplate image shape is %s\n", + image_nb.shape, + ref_nb.shape, + ) resample = True if resample: if not os.path.isdir(out_path.replace(os.path.basename(out_path), "")): try: os.makedirs(out_path.replace(os.path.basename(out_path), "")) - except: + except (OSError, TypeError) as os_error: # TODO: better message - raise Exception("couldn't make the dirs!") - + msg = "couldn't make the dirs!" + raise OSError(msg) from os_error + + WFLOGGER.info( + "Resampling input image:\n%s\n\n..to this reference:\n%s\n\n..and writing" + " this file here:\n%s\n", + image_path, + reference_path, + out_path, + ) cmd = ["flirt", "-in", image_path, "-ref", reference_path, "-out", out_path] if roi_file: cmd.append("-interp") cmd.append("nearestneighbour") return cmd - else: - return resample + return resample def resample_cpac_output_image(cmd_args): + """Run resampling command and return the output file path.""" import subprocess - subprocess.check_output(cmd_args) + WFLOGGER.info("Running:\n%s\n\n", " ".join(cmd_args)) + + flag = "resampled_input_images" for arg in cmd_args: - if "resampled_input_images" in arg: + if flag in arg: out_file = arg - - return out_file + subprocess.check_output(cmd_args) + return out_file + msg = f"Missing required argument '{flag}'" + raise ValueError(msg) def launch_PyBASC(pybasc_config): + """Run PyBASC.""" import subprocess + WFLOGGER.info("Running PyBASC with configuration file:\n%s", pybasc_config) + cmd_args = ["PyBASC", pybasc_config] return subprocess.check_output(cmd_args) @@ -1734,17 +1818,14 @@ def run_basc(pipeline_config): # did that actually work? if not os.path.isfile(ref_file): # TODO: better message - raise Exception( - "\n[!] The reference file could not be found.\nPath: " "{0}\n".format( - ref_file - ) - ) + msg = "\n[!] The reference file could not be found.\nPath: " f"{ref_file}\n" + raise FileNotFoundError(msg) working_dir = os.path.join( working_dir, "cpac_group_analysis", "PyBASC", - "{0}mm_resolution".format(basc_resolution), + f"{basc_resolution}mm_resolution", "working_dir", ) @@ -1781,7 +1862,7 @@ def run_basc(pipeline_config): output_dir, "cpac_group_analysis", "PyBASC", - "{0}mm_resolution".format(basc_resolution), + f"{basc_resolution}mm_resolution", os.path.basename(pipeline_dir), ) working_dir = os.path.join(working_dir, os.path.basename(pipeline_dir)) @@ -1791,7 +1872,7 @@ def run_basc(pipeline_config): if basc_inclusion: inclusion_list = load_text_file( - basc_inclusion, "BASC participant" " inclusion list" + basc_inclusion, "BASC participant inclusion list" ) if "none" in basc_scan_inclusion.lower(): @@ -1885,6 +1966,11 @@ def run_basc(pipeline_config): basc_config_dct["subject_file_list"] = func_paths basc_config_outfile = os.path.join(scan_working_dir, "PyBASC_config.yml") + WFLOGGER.info( + "\nWriting PyBASC configuration file for %s scan in\n%s", + df_scan, + basc_config_outfile, + ) with open(basc_config_outfile, "wt") as f: noalias_dumper = yaml.dumper.SafeDumper noalias_dumper.ignore_aliases = lambda self, data: True @@ -1912,6 +1998,7 @@ def run_isc_group( roi_inclusion=None, num_cpus=1, ): + """Run the ISC pipeline for group-level analysis.""" import os from CPAC.isc.pipeline import create_isc, create_isfc @@ -1959,6 +2046,9 @@ def run_isc_group( if roi_label in _: break else: + WFLOGGER.warning( + "ROI label '%s' not found in\n%s/%s\n", roi_label, derivative, _ + ) continue df_dct = {} @@ -2035,6 +2125,7 @@ def run_isc_group( def run_isc(pipeline_config): + """Run the ISC pipeline.""" import os import yaml @@ -2070,7 +2161,7 @@ def run_isc(pipeline_config): permutations = pipeconfig_dct.get("isc_permutations", 1000) std_filter = pipeconfig_dct.get("isc_level_voxel_std_filter", None) - if std_filter == 0.0: + if std_filter == 0.0: # noqa: PLR2004 std_filter = None levels = [] @@ -2084,6 +2175,10 @@ def run_isc(pipeline_config): return if not isc and not isfc: + WFLOGGER.info( + "\nISC and ISFC are not enabled to run in the group-level analysis" + " configuration YAML file, and will not run.\n" + ) return pipeline_dirs = [] @@ -2094,7 +2189,13 @@ def run_isc(pipeline_config): pipeline_dirs.append(os.path.join(pipeline_dir, dirname)) if not pipeline_dirs: - pass + WFLOGGER.error( + "\nNo pipeline output directories found- make sure your 'pipeline_dir'" + " field in the group configuration YAML file is pointing to a C-PAC" + " pipeline output directory populated with a folder or folders that begin" + " with the 'pipeline_' prefix.\n\nPipeline directory provided:\n%s\n", + pipeline_dir, + ) for pipeline in pipeline_dirs: run_isc_group( @@ -2114,6 +2215,7 @@ def run_isc(pipeline_config): def run_qpp(group_config_file): + """Run the QPP pipeline.""" from CPAC.qpp.pipeline import create_qpp c = load_config_yml(group_config_file) @@ -2144,7 +2246,7 @@ def run_qpp(group_config_file): os.makedirs(out_dir) os.makedirs(working_dir) os.makedirs(crash_dir) - except: + except OSError: pass outputs = gather_outputs( @@ -2167,7 +2269,8 @@ def run_qpp(group_config_file): else: qpp_stratification = [] - for (resource_id, strat_info), output_df in outputs.items(): + for _output_df in outputs.values(): + output_df = _output_df if c["qpp"]["session_inclusion"]: output_df = output_df[ output_df["Sessions"].isin(c["qpp"]["session_inclusion"]) @@ -2180,7 +2283,8 @@ def run_qpp(group_config_file): else: output_df_groups = [([], output_df)] - for group_id, output_df_group in output_df_groups: + for _group_id, _output_df_group in output_df_groups: + group_id, output_df_group = _group_id, _output_df_group group = list(zip(qpp_stratification, group_id)) group_id = "_".join( @@ -2232,6 +2336,7 @@ def run_qpp(group_config_file): def manage_processes(procss, output_dir, num_parallel=1): + """Manage multiple processes in parallel.""" import os # start kicking it off @@ -2267,6 +2372,7 @@ def manage_processes(procss, output_dir, num_parallel=1): else: for job in jobQueue: if not job.is_alive(): + WFLOGGER.warning("found dead job %s", job) loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() @@ -2277,9 +2383,11 @@ def manage_processes(procss, output_dir, num_parallel=1): def run(config_file): - # this runs all group analyses, and this function only really exists for - # the "Run Group-Level Analysis" command on the GUI + """Run all group analyses. + This function only really exists for + the "Run Group-Level Analysis" command on the GUI + """ # get MAIN pipeline config loaded c = load_config_yml(config_file) diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index ceb8b222e0..0e9c5698e2 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Build a pipeline for C-PAC to run on a subject.""" import copy import csv import faulthandler @@ -27,7 +28,7 @@ import yaml import nipype from nipype import config, logging -from flowdump import WorkflowJSONMeta, save_workflow_json +from flowdump import save_workflow_json, WorkflowJSONMeta from indi_aws import aws_utils, fetch_creds import CPAC @@ -108,10 +109,6 @@ ) from CPAC.network_centrality.pipeline import network_centrality from CPAC.nuisance.nuisance import ( - ICA_AROMA_ANTsEPIreg, - ICA_AROMA_ANTsreg, - ICA_AROMA_FSLEPIreg, - ICA_AROMA_FSLreg, choose_nuisance_blocks, erode_mask_bold, erode_mask_boldCSF, @@ -121,6 +118,10 @@ erode_mask_GM, erode_mask_T1w, erode_mask_WM, + ICA_AROMA_ANTsEPIreg, + ICA_AROMA_ANTsreg, + ICA_AROMA_FSLEPIreg, + ICA_AROMA_FSLreg, ingress_regressors, nuisance_regression_template, ) @@ -128,7 +129,7 @@ # pylint: disable=wrong-import-order from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine import NodeBlock, initiate_rpool +from CPAC.pipeline.engine import initiate_rpool, NodeBlock from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -171,7 +172,7 @@ warp_wholeheadT1_to_template, ) from CPAC.reho.reho import reho, reho_space_template -from CPAC.sca.sca import SCA_AVG, dual_regression, multiple_regression +from CPAC.sca.sca import dual_regression, multiple_regression, SCA_AVG from CPAC.seg_preproc.seg_preproc import ( tissue_seg_ants_prior, tissue_seg_EPI_template_based, @@ -188,12 +189,14 @@ from CPAC.utils import Configuration, set_subject from CPAC.utils.docs import version_report from CPAC.utils.monitoring import ( - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, + FMLOGGER, getLogger, log_nodes_cb, log_nodes_initial, + LOGTAIL, set_up_logger, + WARNING_FREESURFER_OFF_WITH_DATA, + WFLOGGER, ) from CPAC.utils.monitoring.draw_gantt_chart import resource_report from CPAC.utils.trimmer import the_trimmer @@ -205,7 +208,6 @@ from CPAC.utils.workflow_serialization import cpac_flowdump_serializer from CPAC.vmhc.vmhc import smooth_func_vmhc, vmhc, warp_timeseries_to_sym_template -logger = getLogger("nipype.workflow") faulthandler.enable() # config.enable_debug_mode() @@ -221,8 +223,7 @@ def run_workflow( plugin_args=None, test_config=False, ): - """ - Function to prepare and, optionally, run the C-PAC workflow. + """Prepare and, optionally, run the C-PAC workflow. Parameters ---------- @@ -251,12 +252,13 @@ def run_workflow( from CPAC.utils.datasource import bidsier_prefix if plugin is not None and not isinstance(plugin, str): - raise TypeError( + msg = ( 'CPAC.pipeline.cpac_pipeline.run_workflow requires a ' 'string for the optional "plugin" argument, but a ' f'{getattr(type(plugin), "__name__", str(type(plugin)))} ' 'was provided.' ) + raise TypeError(msg) # Assure that changes on config will not affect other parts c = copy.copy(c) @@ -273,7 +275,7 @@ def run_workflow( overwrite_existing=True, ) if c.pipeline_setup["Debugging"]["verbose"]: - set_up_logger("engine", level="debug", log_dir=log_dir, mock=True) + set_up_logger("CPAC.engine", level="debug", log_dir=log_dir, mock=True) config.update_config( { @@ -330,7 +332,7 @@ def run_workflow( # TODO: solve the UNet model hanging issue during MultiProc if "UNet" in c.anatomical_preproc["brain_extraction"]["using"]: c.pipeline_setup["system_config"]["max_cores_per_participant"] = 1 - logger.info( + WFLOGGER.info( "\n\n[!] LOCKING CPUs PER PARTICIPANT TO 1 FOR U-NET " "MODEL.\n\nThis is a temporary measure due to a known " "issue preventing Nipype's parallelization from running " @@ -350,10 +352,10 @@ def run_workflow( input_creds_path = os.path.abspath(creds_path) else: err_msg = ( - 'Credentials path: "%s" for subject "%s" was not ' - "found. Check this path and try again." % (creds_path, subject_id) + f'Credentials path: "{creds_path}" for subject "{subject_id}" was' + " not found. Check this path and try again." ) - raise Exception(err_msg) + raise FileNotFoundError(err_msg) else: input_creds_path = None except KeyError: @@ -362,7 +364,7 @@ def run_workflow( # TODO enforce value with schema validation try: encrypt_data = bool(config.pipeline_setup["Amazon-AWS"]["s3_encryption"]) - except: + except (KeyError, TypeError, ValueError): encrypt_data = False information = """ @@ -400,7 +402,7 @@ def run_workflow( {output_check} """ - logger.info( + WFLOGGER.info( "%s", information.format( run_command=" ".join(["run", *sys.argv[1:]]), @@ -412,7 +414,7 @@ def run_workflow( ants_threads=c.pipeline_setup["system_config"]["num_ants_threads"], max_cores=max_core_usage, random_seed=( - " Random seed: %s" % c.pipeline_setup["system_config"]["random_seed"] + f" Random seed: {c.pipeline_setup['system_config']['random_seed']}" ) if c.pipeline_setup["system_config"]["random_seed"] is not None else "", @@ -466,9 +468,9 @@ def run_workflow( set_up_random_state_logger(log_dir) try: - workflow = build_workflow(subject_id, sub_dict, c, p_name, num_ants_cores) + workflow = build_workflow(subject_id, sub_dict, c, p_name) except Exception as exception: - logger.exception("Building workflow failed") + WFLOGGER.exception("Building workflow failed") raise exception wf_graph = c["pipeline_setup", "log_directory", "graphviz", "entire_workflow"] @@ -484,10 +486,11 @@ def run_workflow( simple_form=wf_graph.get("simple_form", True), ) except Exception as exception: - raise RuntimeError( + msg = ( f"Failed to visualize {p_name} (" f"{graph2use}, {graph_format})" - ) from exception + ) + raise RuntimeError(msg) from exception workflow_meta = WorkflowJSONMeta(pipeline_name=p_name, stage="pre") save_workflow_json( @@ -498,7 +501,7 @@ def run_workflow( ) if test_config: - logger.info( + WFLOGGER.info( "This has been a test of the pipeline configuration " "file, the pipeline was built successfully, but was " "not run" @@ -512,7 +515,7 @@ def run_workflow( # with open(os.path.join(working_dir, 'resource_pool.pkl'), 'wb') as f: # pickle.dump(strat_list, f) - # if c.pipeline_setup['working_directory']['regenerate_outputs'] is True: + # if c.pipeline_setup['working_directory']['regenerate_outputs']: # erasable = list(find_files(working_dir, '*sink*')) + \ # list(find_files(working_dir, '*link*')) + \ @@ -525,7 +528,7 @@ def run_workflow( # shutil.rmtree(f) if hasattr(c, "trim") and c.trim: - logger.warning( + WFLOGGER.warning( """ Trimming is an experimental feature, and if used wrongly, it can lead to unreproducible results. @@ -577,10 +580,11 @@ def run_workflow( # Add status callback function that writes in callback log nipype_version = REQUIREMENTS["nipype"] if nipype.__version__ != nipype_version: - logger.warning( - "This version of Nipype may not be compatible " - f"with CPAC v{CPAC.__version__}, please " - f"install Nipype version {nipype_version}\n" + WFLOGGER.warning( + "This version of Nipype may not be compatible with CPAC v%s," + " please install Nipype version %s\n", + CPAC.__version__, + nipype_version, ) if plugin_args["n_procs"] == 1: @@ -594,7 +598,7 @@ def run_workflow( # Actually run the pipeline now, for the current subject workflow_result = workflow.run(plugin=plugin, plugin_args=plugin_args) except UnicodeDecodeError: - raise EnvironmentError( + msg = ( "C-PAC migrated from Python 2 to Python 3 in v1.6.2 (see " "release notes). Your working directory contains Python 2 " "pickles, probably from an older version of C-PAC. If you " @@ -609,6 +613,7 @@ def run_workflow( "utils repickle /path/to/working_dir\n\n" "before running C-PAC >=v1.6.2" ) + raise EnvironmentError(msg) # PyPEER kick-off # if c.PyPEER['run']: @@ -621,9 +626,7 @@ def run_workflow( # Dump subject info pickle file to subject log dir subject_info["status"] = "Completed" - subject_info_file = os.path.join( - log_dir, "subject_info_%s.pkl" % subject_id - ) + subject_info_file = os.path.join(log_dir, f"subject_info_{subject_id}.pkl") with open(subject_info_file, "wb") as info: pickle.dump(list(subject_info), info) @@ -658,7 +661,7 @@ def run_workflow( timing_temp_file_path = os.path.join( c.pipeline_setup["log_directory"]["path"], - "%s_pipeline_timing.tmp" % unique_pipeline_id, + f"{unique_pipeline_id}_pipeline_timing.tmp", ) if not os.path.isfile(timing_temp_file_path): @@ -709,8 +712,8 @@ def run_workflow( with open( os.path.join( c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing_%s.csv" - % c.pipeline_setup["pipeline_name"], + "cpac_individual_timing" + f"_{c.pipeline_setup['pipeline_name']}.csv", ), "a", ) as timeCSV, open( @@ -729,7 +732,7 @@ def run_workflow( if "Start_Time" in line: headerExists = True - if headerExists is False: + if not headerExists: timeWriter.writerow(timeHeader) timeWriter.writerow(pipelineTimeDict) @@ -771,7 +774,7 @@ def run_workflow( except Exception as exc: err_msg = "Unable to upload CPAC log files in: %s.\nError: %s" - logger.error(err_msg, log_dir, exc) + FMLOGGER.error(err_msg, log_dir, exc) except Exception: import traceback @@ -794,9 +797,9 @@ def run_workflow( finally: if workflow: if os.path.exists(cb_log_filename): - resource_report(cb_log_filename, num_cores_per_sub, logger) + resource_report(cb_log_filename, num_cores_per_sub, WFLOGGER) - logger.info( + WFLOGGER.info( "%s", execution_info.format( workflow=workflow.name, @@ -841,13 +844,14 @@ def remove_workdir(wdpath: str) -> None: """ try: if os.path.exists(wdpath): - logger.info("Removing working dir: %s", wdpath) + FMLOGGER.info("Removing working dir: %s", wdpath) shutil.rmtree(wdpath) except (FileNotFoundError, PermissionError): - logger.warning("Could not remove working directory %s", wdpath) + FMLOGGER.warning("Could not remove working directory %s", wdpath) def initialize_nipype_wf(cfg, sub_data_dct, name=""): + """Initialize a new nipype workflow.""" if name: name = f"_{name}" @@ -865,34 +869,36 @@ def initialize_nipype_wf(cfg, sub_data_dct, name=""): def load_cpac_pipe_config(pipe_config): - # Load in pipeline config file + """Load in pipeline config file.""" config_file = os.path.realpath(pipe_config) try: if not os.path.exists(config_file): raise IOError - else: - cfg = Configuration(yaml.safe_load(open(config_file, "r"))) + cfg = Configuration(yaml.safe_load(open(config_file, "r"))) except IOError: raise except yaml.parser.ParserError as e: - error_detail = '"%s" at line %d' % (e.problem, e.problem_mark.line) - raise Exception( - "Error parsing config file: {0}\n\n" + error_detail = f'"{e.problem}" at line {e.problem_mark.line}' + msg = ( + f"Error parsing config file: {config_file}\n\n" "Error details:\n" - " {1}" - "\n\n".format(config_file, error_detail) + f" {error_detail}" + "\n\n" ) + raise yaml.parser.ParserError(msg) from e except Exception as e: - raise Exception( - "Error parsing config file: {0}\n\n" + msg = ( + f"Error parsing config file: {config_file}\n\n" "Error details:\n" - " {1}" - "\n\n".format(config_file, e) + f" {e}" + "\n\n" ) + raise yaml.parser.ParserError(msg) from e return cfg def build_anat_preproc_stack(rpool, cfg, pipeline_blocks=None): + """Build the anatomical preprocessing stack.""" if not pipeline_blocks: pipeline_blocks = [] @@ -1050,6 +1056,7 @@ def build_anat_preproc_stack(rpool, cfg, pipeline_blocks=None): def build_T1w_registration_stack(rpool, cfg, pipeline_blocks=None): + """Build the T1w registration pipeline blocks.""" if not pipeline_blocks: pipeline_blocks = [] @@ -1079,6 +1086,7 @@ def build_T1w_registration_stack(rpool, cfg, pipeline_blocks=None): def build_segmentation_stack(rpool, cfg, pipeline_blocks=None): + """Build the tissue segmentation pipeline blocks.""" if not pipeline_blocks: pipeline_blocks = [] @@ -1118,7 +1126,7 @@ def build_segmentation_stack(rpool, cfg, pipeline_blocks=None): def list_blocks(pipeline_blocks, indent=None): - """Function to list node blocks line by line. + """List node blocks line by line. Parameters ---------- @@ -1158,10 +1166,9 @@ def list_blocks(pipeline_blocks, indent=None): def connect_pipeline(wf, cfg, rpool, pipeline_blocks): - logger.info( - "\n".join( - ["Connecting pipeline blocks:", list_blocks(pipeline_blocks, indent=1)] - ) + """Connect the pipeline blocks to the workflow.""" + WFLOGGER.info( + "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) ) previous_nb = None @@ -1171,7 +1178,7 @@ def connect_pipeline(wf, cfg, rpool, pipeline_blocks): wf = nb.connect_block(wf, cfg, rpool) except LookupError as e: if nb.name == "freesurfer_postproc": - logger.warning(WARNING_FREESURFER_OFF_WITH_DATA) + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) continue previous_nb_str = ( @@ -1195,7 +1202,7 @@ def connect_pipeline(wf, cfg, rpool, pipeline_blocks): f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", ) if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug(e.args[0]) verbose_logger.debug(rpool) raise @@ -1204,11 +1211,12 @@ def connect_pipeline(wf, cfg, rpool, pipeline_blocks): return wf -def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores=1): +def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): + """Build a C-PAC workflow for a single subject.""" from CPAC.utils.datasource import gather_extraction_maps # Workflow setup - wf = initialize_nipype_wf(cfg, sub_dict) + wf = initialize_nipype_wf(cfg, sub_dict, name=pipeline_name) # Extract credentials path if it exists try: @@ -1218,10 +1226,10 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores input_creds_path = os.path.abspath(creds_path) else: err_msg = ( - 'Credentials path: "%s" for subject "%s" was not ' - "found. Check this path and try again." % (creds_path, subject_id) + f'Credentials path: "{creds_path}" for subject "{subject_id}" was' + " not found. Check this path and try again." ) - raise Exception(err_msg) + raise FileNotFoundError(err_msg) else: input_creds_path = None except KeyError: @@ -1229,9 +1237,9 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores cfg.pipeline_setup["input_creds_path"] = input_creds_path - """""" """""" """""" """""" """""" """""" """""" """""" """ - PREPROCESSING - """ """""" """""" """""" """""" """""" """""" """""" """""" + # """"""""""""""""""""""""""""""""""""""""""""""""""" + # PREPROCESSING + # """"""""""""""""""""""""""""""""""""""""""""""""""" wf, rpool = initiate_rpool(wf, cfg, sub_dict) @@ -1283,7 +1291,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores if "phasediff" in fmap_keys or "phase1" in fmap_keys: if "magnitude" in fmap_keys or "magnitude1" in fmap_keys: distcor_blocks.append(distcor_phasediff_fsl_fugue) - if len(fmap_keys) == 2: + if len(fmap_keys) == 2: # noqa: PLR2004 for key in fmap_keys: if "epi_" not in key: break @@ -1426,7 +1434,6 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores if rpool.check_rpool(func): apply_func_warp["T1"] = False - target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] target_space_alff = cfg.amplitude_low_frequency_fluctuation["target_space"] target_space_reho = cfg.regional_homogeneity["target_space"] @@ -1457,9 +1464,6 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores pipeline_blocks += [func_despike_template] - if "Template" in target_space_alff and target_space_nuis == "native": - pipeline_blocks += [warp_denoiseNofilt_to_T1template] - template = cfg.registration_workflows["functional_registration"][ "func_registration_to_template" ]["target_template"]["using"] @@ -1597,7 +1601,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores missing_key = lookup_error.args[0].split("': ")[-1] for errorstring in [ "[!] C-PAC says: The listed resource is not in the resource pool:", - "[!] C-PAC says: None of the listed resources are in the resource " "pool:", + "[!] C-PAC says: None of the listed resources are in the resource pool:", "[!] C-PAC says: None of the listed resources in the node block " "being connected exist in the resource pool.\n\nResources:", ]: @@ -1623,7 +1627,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores # TODO enforce value with schema validation try: bool(cfg.pipeline_setup["Amazon-AWS"]["s3_encryption"]) - except: + except (KeyError, TypeError, ValueError): pass # TODO enforce value with schema validation @@ -1644,16 +1648,16 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None, num_ants_cores ) if not s3_write_access: - raise Exception("Not able to write to bucket!") + msg = "Not able to write to bucket!" + raise PermissionError(msg) except Exception as e: if cfg.pipeline_setup["output_directory"]["path"].lower().startswith("s3://"): err_msg = ( - "There was an error processing credentials or " - "accessing the S3 bucket. Check and try again.\n" - "Error: %s" % e + "There was an error processing credentials or accessing the S3 bucket." + f" Check and try again.\nError: {e}" ) - raise Exception(err_msg) + raise ConnectionError(err_msg) # Collect all pipeline variants and write to output directory rpool.gather_pipes(wf, cfg) diff --git a/CPAC/pipeline/cpac_randomise_pipeline.py b/CPAC/pipeline/cpac_randomise_pipeline.py index 5eb562c4d6..ded1719477 100644 --- a/CPAC/pipeline/cpac_randomise_pipeline.py +++ b/CPAC/pipeline/cpac_randomise_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -22,7 +22,7 @@ from CPAC.pipeline.cpac_group_runner import load_config_yml from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin from CPAC.utils.interfaces.fsl import Merge as fslMerge -from CPAC.utils.monitoring import log_nodes_cb +from CPAC.utils.monitoring import log_nodes_cb, WFLOGGER def load_subject_file(group_config_path): @@ -50,7 +50,9 @@ def randomise_merged_mask(s_paths): def prep_randomise_workflow(c, subject_infos): + WFLOGGER.info("Preparing Randomise workflow") p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) + WFLOGGER.info("Subjects %s", s_ids) wf = pe.Workflow(name="randomise_workflow") wf.base_dir = c.pipeline_setup["working_directory"]["path"] diff --git a/CPAC/pipeline/cpac_runner.py b/CPAC/pipeline/cpac_runner.py index 8ba4f60e28..428a5aa685 100644 --- a/CPAC/pipeline/cpac_runner.py +++ b/CPAC/pipeline/cpac_runner.py @@ -1,20 +1,19 @@ -"""Copyright (C) 2022 C-PAC Developers. +# Copyright (C) 2022-2024 C-PAC Developers. -This file is part of C-PAC. +# This file is part of C-PAC. -C-PAC is free software: you can redistribute it and/or modify it under -the terms of the GNU Lesser General Public License as published by the -Free Software Foundation, either version 3 of the License, or (at your -option) any later version. +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -C-PAC is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. -You should have received a copy of the GNU Lesser General Public -License along with C-PAC. If not, see . -""" +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from multiprocessing import Process import os from time import strftime @@ -24,10 +23,10 @@ import yaml from CPAC.longitudinal_pipeline.longitudinal_workflow import anat_longitudinal_wf -from CPAC.utils.configuration import Configuration, check_pname, set_subject +from CPAC.utils.configuration import check_pname, Configuration, set_subject from CPAC.utils.configuration.yaml_template import upgrade_pipeline_to_1_8 from CPAC.utils.ga import track_run -from CPAC.utils.monitoring import failed_to_start, log_nodes_cb +from CPAC.utils.monitoring import failed_to_start, log_nodes_cb, WFLOGGER # Run condor jobs @@ -38,9 +37,8 @@ def run_condor_jobs(c, config_file, subject_list_file, p_name): try: sublist = yaml.safe_load(open(os.path.realpath(subject_list_file), "r")) except: - raise Exception( - "Subject list is not in proper YAML format. Please check your file" - ) + msg = "Subject list is not in proper YAML format. Please check your file" + raise Exception(msg) cluster_files_dir = os.path.join(os.getcwd(), "cluster_files") subject_bash_file = os.path.join( @@ -96,6 +94,7 @@ def run_condor_jobs(c, config_file, subject_list_file, p_name): f.close() # commands.getoutput('chmod +x %s' % subject_bash_file ) + WFLOGGER.info(subprocess.getoutput("condor_submit %s ", subject_bash_file)) # Create and run script for CPAC to run on cluster @@ -117,16 +116,14 @@ def run_cpac_on_cluster(config_file, subject_list_file, cluster_files_dir): pipeline_dict = yaml.safe_load(open(os.path.realpath(config_file), "r")) pipeline_config = Configuration(pipeline_dict) except: - raise Exception( - "Pipeline config is not in proper YAML format. " "Please check your file" - ) + msg = "Pipeline config is not in proper YAML format. Please check your file" + raise Exception(msg) # Load in the subject list try: sublist = yaml.safe_load(open(os.path.realpath(subject_list_file), "r")) except: - raise Exception( - "Subject list is not in proper YAML format. " "Please check your file" - ) + msg = "Subject list is not in proper YAML format. Please check your file" + raise Exception(msg) # Init variables timestamp = str(strftime("%Y_%m_%d_%H_%M_%S")) @@ -262,7 +259,6 @@ def run_T1w_longitudinal(sublist, cfg): ) -# Run C-PAC subjects via job queue def run( subject_list_file, config_file=None, @@ -274,7 +270,8 @@ def run( debug=False, test_config=False, ) -> int: - """ + """Run C-PAC subjects via job queue. + Returns ------- int @@ -288,6 +285,8 @@ def run( from CPAC.pipeline.cpac_pipeline import run_workflow + WFLOGGER.info("Run called with config file %s", config_file) + if plugin_args is None: plugin_args = {"status_callback": log_nodes_cb} @@ -311,6 +310,7 @@ def run( (file_paths, config) = collect_bids_files_configs(subject_list_file, None) sublist = bids_gen_cpac_sublist(subject_list_file, file_paths, config, None) if not sublist: + WFLOGGER.error("Did not find data in %s", subject_list_file) return 1 # take date+time stamp for run identification purposes @@ -330,26 +330,33 @@ def run( upgrade_pipeline_to_1_8(config_file) c = Configuration(yaml.safe_load(open(config_file, "r"))) except Exception as e: - raise e + msg = ( + "C-PAC could not upgrade pipeline configuration file " + f"{config_file} to v1.8 syntax" + ) + raise RuntimeError(msg) from e except Exception as e: raise e - except IOError: - raise + except IOError as e: + msg = f"config file {config_file} doesn't exist" + raise FileNotFoundError(msg) from e except yaml.parser.ParserError as e: error_detail = '"%s" at line %d' % (e.problem, e.problem_mark.line) - raise Exception( - "Error parsing config file: {0}\n\n" + msg = ( + f"Error parsing config file: {config_file}\n\n" "Error details:\n" - " {1}" - "\n\n".format(config_file, error_detail) + f" {error_detail}" + "\n\n" ) + raise Exception(msg) except Exception as e: - raise Exception( - "Error parsing config file: {0}\n\n" + msg = ( + f"Error parsing config file: {config_file}\n\n" "Error details:\n" - " {1}" - "\n\n".format(config_file, e) + f" {e}" + "\n\n" ) + raise Exception(msg) c.pipeline_setup["log_directory"]["path"] = os.path.abspath( c.pipeline_setup["log_directory"]["path"] @@ -366,14 +373,16 @@ def run( if num_subs_at_once: if not str(num_subs_at_once).isdigit(): - raise Exception("[!] Value entered for --num_cores not a digit.") + msg = "[!] Value entered for --num_cores not a digit." + raise Exception(msg) c.pipeline_setup["system_config"]["num_participants_at_once"] = int( num_subs_at_once ) # Do some validation if not c.pipeline_setup["working_directory"]["path"]: - raise Exception("Working directory not specified") + msg = "Working directory not specified" + raise Exception(msg) if len(c.pipeline_setup["working_directory"]["path"]) > 70: warnings.warn( @@ -394,7 +403,8 @@ def run( if not sublist: sublist = yaml.safe_load(open(subject_list_file, "r")) except: - raise Exception + msg = "Subject list is not in proper YAML format. Please check your file" + raise FileNotFoundError(msg) # Populate subject scan map sub_scan_map = {} @@ -415,8 +425,12 @@ def run( scan_ids.append("scan_" + str(id)) sub_scan_map[s] = scan_ids - except: - raise Exception + except Exception as e: + msg = ( + "\n\nERROR: Subject list file not in proper format - check if you loaded" + " the correct file?\nError name: cpac_runner_0001\n\n" + ) + raise ValueError(msg) from e pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) @@ -430,7 +444,7 @@ def run( participants=len(sublist), ) except: - pass + WFLOGGER.error("Usage tracking failed for this run.") # If we're running on cluster, execute job scheduler if c.pipeline_setup["system_config"]["on_grid"]["run"]: @@ -621,7 +635,7 @@ def replace_index(target1, target2, file_path): pass yaml.dump(sublist, open(os.path.join(c.pipeline_setup['working_directory']['path'],'data_config_longitudinal.yml'), 'w'), default_flow_style=False) - print('\n\n' + 'Longitudinal pipeline completed.' + '\n\n') + WFLOGGER.info("\n\nLongitudinal pipeline completed.\n\n") # skip main preprocessing if ( @@ -725,6 +739,7 @@ def replace_index(target1, target2, file_path): for job in job_queue: # If the job is not alive if not job.is_alive(): + WFLOGGER.warning("found dead job %s", job) # Find job and delete it from queue loc = job_queue.index(job) del job_queue[loc] diff --git a/CPAC/pipeline/engine.py b/CPAC/pipeline/engine.py index 8a1eb79454..a69db50825 100644 --- a/CPAC/pipeline/engine.py +++ b/CPAC/pipeline/engine.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021-2023 C-PAC Developers +# Copyright (C) 2021-2024 C-PAC Developers # This file is part of C-PAC. @@ -53,7 +53,12 @@ ) from CPAC.utils.interfaces.datasink import DataSink from CPAC.utils.interfaces.function import Function -from CPAC.utils.monitoring import LOGTAIL, WARNING_FREESURFER_OFF_WITH_DATA, getLogger +from CPAC.utils.monitoring import ( + getLogger, + LOGTAIL, + WARNING_FREESURFER_OFF_WITH_DATA, + WFLOGGER, +) from CPAC.utils.outputs import Outputs from CPAC.utils.typing import LIST_OR_STR, TUPLE from CPAC.utils.utils import ( @@ -64,8 +69,6 @@ write_output_json, ) -logger = getLogger("nipype.workflow") - class ResourcePool: def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): @@ -244,6 +247,9 @@ def get_strat_info(self, prov, label=None, logdir=None): if label: if not logdir: logdir = self.logdir + WFLOGGER.info( + "\n\nPrinting out strategy info for %s in %s\n", label, logdir + ) write_output_json( strat_info, f"{label}_strat_info", indent=4, basedir=logdir ) @@ -251,11 +257,12 @@ def get_strat_info(self, prov, label=None, logdir=None): def set_json_info(self, resource, pipe_idx, key, val): # TODO: actually should probably be able to inititialize resource/pipe_idx if pipe_idx not in self.rpool[resource]: - raise Exception( + msg = ( "\n[!] DEV: The pipeline/strat ID does not exist " f"in the resource pool.\nResource: {resource}" f"Pipe idx: {pipe_idx}\nKey: {key}\nVal: {val}\n" ) + raise Exception(msg) else: if "json" not in self.rpool[resource][pipe_idx]: self.rpool[resource][pipe_idx]["json"] = {} @@ -336,11 +343,12 @@ def set_data( try: res, new_pipe_idx = self.generate_prov_string(new_prov_list) except IndexError: - raise IndexError( + msg = ( f"\n\nThe set_data() call for {resource} has no " "provenance information and should not be an " "injection." ) + raise IndexError(msg) if not json_info: json_info = { "RawSources": [resource] @@ -414,7 +422,7 @@ def get( if report_fetched: return (None, None) return None - raise LookupError( + msg = ( "\n\n[!] C-PAC says: None of the listed resources are in " f"the resource pool:\n\n {resource}\n\nOptions:\n- You " "can enable a node block earlier in the pipeline which " @@ -428,6 +436,7 @@ def get( "through any of our support channels at: " "https://fcp-indi.github.io/\n" ) + raise LookupError(msg) def get_data( self, resource, pipe_idx=None, report_fetched=False, quick_single=False @@ -451,7 +460,8 @@ def copy_resource(self, resource, new_name): try: self.rpool[new_name] = self.rpool[resource] except KeyError: - raise Exception(f"[!] {resource} not in the resource pool.") + msg = f"[!] {resource} not in the resource pool." + raise Exception(msg) def update_resource(self, resource, new_name): # move over any new pipe_idx's @@ -474,11 +484,12 @@ def get_json(self, resource, strat=None): if "json" in resource_strat_dct: strat_json = resource_strat_dct["json"] else: - raise Exception( + msg = ( "\n[!] Developer info: the JSON " f"information for {resource} and {strat} " f"is incomplete.\n" ) + raise Exception(msg) return strat_json def get_cpac_provenance(self, resource, strat=None): @@ -499,10 +510,11 @@ def generate_prov_string(prov): # MULTIPLE PRECEDING RESOURCES (or single, if just one) # NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation) if not isinstance(prov, list): - raise Exception( + msg = ( "\n[!] Developer info: the CpacProvenance " f"entry for {prov} has to be a list.\n" ) + raise Exception(msg) last_entry = get_last_prov_entry(prov) resource = last_entry.split(":")[0] return (resource, str(prov)) @@ -510,10 +522,11 @@ def generate_prov_string(prov): @staticmethod def generate_prov_list(prov_str): if not isinstance(prov_str, str): - raise Exception( + msg = ( "\n[!] Developer info: the CpacProvenance " f"entry for {prov_str!s} has to be a string.\n" ) + raise Exception(msg) return ast.literal_eval(prov_str) @staticmethod @@ -559,7 +572,7 @@ def get_strats(self, resources, debug=False): linked_resources = [] resource_list = [] if debug: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("\nresources: %s", resources) for resource in resources: # grab the linked-input tuples @@ -583,7 +596,7 @@ def get_strats(self, resources, debug=False): variant_pool = {} len_inputs = len(resource_list) if debug: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("linked_resources: %s", linked_resources) verbose_logger.debug("resource_list: %s", resource_list) for resource in resource_list: @@ -611,7 +624,7 @@ def get_strats(self, resources, debug=False): variant_pool[fetched_resource].append(f"NO-{val[0]}") if debug: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) total_pool.append(sub_pool) @@ -649,7 +662,7 @@ def get_strats(self, resources, debug=False): strat_list_list.append(strat_list) if debug: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) for strat_list in strat_list_list: json_dct = {} @@ -1342,7 +1355,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): try: wf.connect(node, out, nii_name, "in_file") except OSError as os_error: - logger.warning(os_error) + WFLOGGER.warning(os_error) continue write_json_imports = ["import os", "import json"] @@ -1421,7 +1434,8 @@ def __init__(self, node_block_functions, debug=False): if hasattr(node_block_function, "__name__") else str(node_block_function) ) - raise TypeError(f'Object is not a nodeblock: "{obj_str}"') + msg = f'Object is not a nodeblock: "{obj_str}"' + raise TypeError(msg) name = node_block_function.name self.name = name @@ -1456,11 +1470,11 @@ def __init__(self, node_block_functions, debug=False): if node_block_function.outputs is not None: self.options = node_block_function.outputs - logger.info("Connecting %s...", name) + WFLOGGER.info("Connecting %s...", name) if debug: config.update_config({"logging": {"workflow_level": "DEBUG"}}) logging.update_logging(config) - logger.debug( + WFLOGGER.debug( '"inputs": %s\n\t "outputs": %s%s', node_block_function.inputs, list(self.outputs.keys()), @@ -1481,11 +1495,12 @@ def check_null(self, val): def check_output(self, outputs, label, name): if label not in outputs: - raise NameError( + msg = ( f'\n[!] Output name "{label}" in the block ' "function does not match the outputs list " f'{outputs} in Node Block "{name}"\n' ) + raise NameError(msg) def grab_tiered_dct(self, cfg, key_list): cfg_dct = cfg @@ -1521,7 +1536,8 @@ def connect_block(self, wf, cfg, rpool): ): # <---- goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list opts.append(option) except AttributeError as err: - raise Exception(f"{err}\nNode Block: {name}") + msg = f"{err}\nNode Block: {name}" + raise Exception(msg) if opts is None: opts = [opts] @@ -1529,7 +1545,7 @@ def connect_block(self, wf, cfg, rpool): elif option_key and not option_val: # enables multiple config forking entries if not isinstance(option_key[0], list): - raise Exception( + msg = ( f"[!] The option_key field ({option_key}) " f"for {name} exists but there is no " "option_val.\n\nIf you are trying to " @@ -1537,6 +1553,7 @@ def connect_block(self, wf, cfg, rpool): "option_val field must contain a list of " "a list.\n" ) + raise Exception(msg) for option_config in option_key: # option_config is a list of pipe config levels down to the option if config: @@ -1611,11 +1628,12 @@ def connect_block(self, wf, cfg, rpool): try: key_list = config + switch except TypeError: - raise Exception( + msg = ( "\n\n[!] Developer info: Docstring error " f"for {name}, make sure the 'config' or " "'switch' fields are lists.\n\n" ) + raise Exception(msg) switch = self.grab_tiered_dct(cfg, key_list) else: if isinstance(switch[0], list): @@ -1670,12 +1688,12 @@ def connect_block(self, wf, cfg, rpool): try: wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) except IOError as e: # duplicate node - logger.warning(e) + WFLOGGER.warning(e) continue if not outs: - if block_function.__name__ == "freesurfer_" "postproc": - logger.warning(WARNING_FREESURFER_OFF_WITH_DATA) + if block_function.__name__ == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) LOGTAIL["warnings"].append( WARNING_FREESURFER_OFF_WITH_DATA ) @@ -1687,7 +1705,7 @@ def connect_block(self, wf, cfg, rpool): node_name = f'{node_name}_{opt["Name"]}' if debug: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("\n=======================") verbose_logger.debug("Node name: %s", node_name) prov_dct = rpool.get_resource_strats_from_prov( @@ -1890,6 +1908,7 @@ def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): if "anat" not in data_paths: + WFLOGGER.warning("No anatomical data present.") return rpool if "creds_path" not in data_paths: @@ -1934,6 +1953,7 @@ def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): if "anat" not in data_paths: + WFLOGGER.warning("No FreeSurfer data present.") return rpool if "freesurfer_dir" in data_paths["anat"]: @@ -2038,7 +2058,7 @@ def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id # pylint: disable=protected-access wf._local_func_scans = local_func_scans if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("engine") + verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("local_func_scans: %s", local_func_scans) del local_func_scans @@ -2050,6 +2070,8 @@ def ingress_output_dir( ): dir_path = data_paths["derivatives_dir"] + WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) + anat = os.path.join(dir_path, "anat") func = os.path.join(dir_path, "func") @@ -2091,11 +2113,12 @@ def ingress_output_dir( data_label = filename.split(unique_id)[1].lstrip("_") if len(filename) == len(data_label): - raise Exception( + msg = ( "\n\n[!] Possibly wrong participant or " "session in this directory?\n\n" f"Filepath: {filepath}\n\n" ) + raise Exception(msg) bidstag = "" for tag in data_label.split("_"): @@ -2201,6 +2224,9 @@ def json_outdir_ingress(rpool, filepath, exts, data_label, json): jsonpath = f"{jsonpath}.json" if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath + ) json_info = { "Description": "This data was generated elsewhere and " "supplied by the user into this C-PAC run's " @@ -2228,13 +2254,14 @@ def json_outdir_ingress(rpool, filepath, exts, data_label, json): if only_desc[-1] == "-": only_desc = only_desc.rstrip("-") else: - raise Exception( + msg = ( "\n[!] Something went wrong with either " "reading in the output directory or when " "it was written out previously.\n\nGive " "this to your friendly local C-PAC " f"developer:\n\n{data_label!s}\n" ) + raise Exception(msg) # remove the integer at the end of the desc-* variant, we will # get the unique pipe_idx from the CpacProvenance below @@ -2654,11 +2681,11 @@ def run_node_blocks(blocks, data_paths, cfg=None): run_blocks = [] if rpool.check_rpool("desc-preproc_T1w"): - pass + WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") else: run_blocks += blocks[0] if rpool.check_rpool("desc-preproc_bold"): - pass + WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") else: run_blocks += blocks[1] diff --git a/CPAC/pipeline/nipype_pipeline_engine/__init__.py b/CPAC/pipeline/nipype_pipeline_engine/__init__.py index fef097b47b..b7ca44ec47 100644 --- a/CPAC/pipeline/nipype_pipeline_engine/__init__.py +++ b/CPAC/pipeline/nipype_pipeline_engine/__init__.py @@ -28,12 +28,12 @@ # import our DEFAULT_MEM_GB and override Node, MapNode from .engine import ( DEFAULT_MEM_GB, - UNDEFINED_SIZE, + export_graph, + get_data_size, MapNode, Node, + UNDEFINED_SIZE, Workflow, - export_graph, - get_data_size, ) __all__ = [interface for interface in dir(pe) if not interface.startswith("_")] + [ diff --git a/CPAC/pipeline/nipype_pipeline_engine/engine.py b/CPAC/pipeline/nipype_pipeline_engine/engine.py index e93aacd7a3..80551e5b13 100644 --- a/CPAC/pipeline/nipype_pipeline_engine/engine.py +++ b/CPAC/pipeline/nipype_pipeline_engine/engine.py @@ -71,14 +71,12 @@ from nipype.utils.filemanip import fname_presuffix from nipype.utils.functions import getsource -from CPAC.utils.monitoring.custom_logging import getLogger +from CPAC.utils.monitoring import getLogger, WFLOGGER # set global default mem_gb DEFAULT_MEM_GB = 2.0 UNDEFINED_SIZE = (42, 42, 42, 1200) -logger = getLogger("nipype.workflow") - def _check_mem_x_path(mem_x_path): """Function to check if a supplied multiplier path exists. @@ -119,7 +117,8 @@ def _doctest_skiplines(docstring, lines_to_skip): 'skip this line # doctest: +SKIP' """ if not isinstance(lines_to_skip, set) and not isinstance(lines_to_skip, list): - raise TypeError("_doctest_skiplines: `lines_to_skip` must be a set or list.") + msg = "_doctest_skiplines: `lines_to_skip` must be a set or list." + raise TypeError(msg) return "\n".join( [ @@ -157,7 +156,7 @@ def __init__(self, *args, mem_gb=DEFAULT_MEM_GB, **kwargs): from CPAC.pipeline.random_state import random_seed super().__init__(*args, mem_gb=mem_gb, **kwargs) - self.logger = getLogger("nipype.workflow") + self.logger = WFLOGGER self.seed = random_seed() self.seed_applied = False self.input_data_shape = Undefined @@ -400,9 +399,8 @@ def mem_gb(self): try: mem_x_path = getattr(self.inputs, self._mem_x["file"]) except AttributeError as attribute_error: - raise AttributeError( - f"{attribute_error.args[0]} in Node '{self.name}'" - ) from attribute_error + msg = f"{attribute_error.args[0]} in Node '{self.name}'" + raise AttributeError(msg) from attribute_error if _check_mem_x_path(mem_x_path): # constant + mem_x[0] * t return self._apply_mem_x() @@ -500,7 +498,7 @@ def __init__(self, name, base_dir=None, debug=False): super().__init__(name, base_dir) self._debug = debug - self.verbose_logger = getLogger("engine") if debug else None + self.verbose_logger = getLogger("CPAC.engine") if debug else None self._graph = nx.DiGraph() self._nodes_cache = set() @@ -630,7 +628,7 @@ def _get_dot( subnodename = subnodefullname.replace(".", "_") for _ in self._graph.get_edge_data(node, subnode)["connect"]: dotlist.append(f'"{nodename}" -> "{subnodename}";') - logger.debug("connection: %s", dotlist[-1]) + WFLOGGER.debug("connection: %s", dotlist[-1]) # add between workflow connections for u, v, d in self._graph.edges(data=True): uname = ".".join([*hierarchy, u.fullname]) @@ -655,7 +653,7 @@ def _get_dot( f'"{uname1.replace(".", "_")}" -> ' f'"{vname1.replace(".", "_")}";' ) - logger.debug("cross connection: %s", dotlist[-1]) + WFLOGGER.debug("cross connection: %s", dotlist[-1]) return ("\n" + prefix).join(dotlist) def _handle_just_in_time_exception(self, node): @@ -689,10 +687,11 @@ def write_graph( os.makedirs(base_dir, exist_ok=True) if graph2use in ["hierarchical", "colored"]: if self.name[:1].isdigit(): # these graphs break if int - raise ValueError( + msg = ( f"{graph2use} graph failed, workflow name " "cannot begin with a number" ) + raise ValueError(msg) dotfilename = os.path.join(base_dir, dotfilename) self.write_hierarchical_dotfile( dotfilename=dotfilename, @@ -714,8 +713,8 @@ def write_graph( simple_form=simple_form, ) - logger.info( - "Generated workflow graph: %s " "(graph2use=%s, simple_form=%s).", + WFLOGGER.info( + "Generated workflow graph: %s (graph2use=%s, simple_form=%s).", outfname, graph2use, simple_form, @@ -739,7 +738,7 @@ def write_hierarchical_dotfile( fp.writelines(dotstr) fp.close() else: - logger.info(dotstr) + WFLOGGER.info(dotstr) def get_data_size(filepath, mode="xyzt"): @@ -809,9 +808,9 @@ def export_graph( graph = deepcopy(graph_in) if use_execgraph: graph = generate_expanded_graph(graph) - logger.debug("using execgraph") + WFLOGGER.debug("using execgraph") else: - logger.debug("using input graph") + WFLOGGER.debug("using input graph") if base_dir is None: base_dir = os.getcwd() @@ -824,7 +823,7 @@ def export_graph( # Convert .dot if format != 'dot' outfname, res = _run_dot(out_dot, format_ext=format) if res is not None and res.runtime.returncode: - logger.warning("dot2png: %s", res.runtime.stderr) + WFLOGGER.warning("dot2png: %s", res.runtime.stderr) pklgraph = _create_dot_graph(graph, show_connectinfo, simple_form) simple_dot = fname_presuffix( @@ -835,7 +834,7 @@ def export_graph( # Convert .dot if format != 'dot' simplefname, res = _run_dot(simple_dot, format_ext=format) if res is not None and res.runtime.returncode: - logger.warning("dot2png: %s", res.runtime.stderr) + WFLOGGER.warning("dot2png: %s", res.runtime.stderr) if show: pos = nx.graphviz_layout(pklgraph, prog="dot") diff --git a/CPAC/pipeline/nipype_pipeline_engine/monkeypatch.py b/CPAC/pipeline/nipype_pipeline_engine/monkeypatch.py index cd0402ca46..f367a2d77e 100644 --- a/CPAC/pipeline/nipype_pipeline_engine/monkeypatch.py +++ b/CPAC/pipeline/nipype_pipeline_engine/monkeypatch.py @@ -7,11 +7,11 @@ def patch_base_interface(): """ from nipype.interfaces.base.core import ( BaseInterface, - InterfaceResult, - RuntimeContext, config, indirectory, + InterfaceResult, os, + RuntimeContext, str2bool, write_provenance, ) diff --git a/CPAC/pipeline/nipype_pipeline_engine/plugins/cpac_nipype_custom.py b/CPAC/pipeline/nipype_pipeline_engine/plugins/cpac_nipype_custom.py index 2ff9fe9b1c..ec0210584f 100644 --- a/CPAC/pipeline/nipype_pipeline_engine/plugins/cpac_nipype_custom.py +++ b/CPAC/pipeline/nipype_pipeline_engine/plugins/cpac_nipype_custom.py @@ -45,7 +45,7 @@ from numpy import flatnonzero from nipype.pipeline.plugins.multiproc import logger -from CPAC.pipeline.nipype_pipeline_engine import UNDEFINED_SIZE, MapNode +from CPAC.pipeline.nipype_pipeline_engine import MapNode, UNDEFINED_SIZE from CPAC.utils.monitoring import log_nodes_cb diff --git a/CPAC/pipeline/random_state/seed.py b/CPAC/pipeline/random_state/seed.py index 07329aa715..3ce86b17e7 100644 --- a/CPAC/pipeline/random_state/seed.py +++ b/CPAC/pipeline/random_state/seed.py @@ -188,10 +188,11 @@ def set_up_random_state(seed): seed = int(seed) assert 0 < seed <= np.iinfo(np.int32).max except (ValueError, TypeError, AssertionError): - raise ValueError( + msg = ( "Valid random seeds are positive integers up to " f'2147483647, "random", or None, not {seed}' ) + raise ValueError(msg) _seed["seed"] = seed return random_seed() diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index 93deed34fd..04d9076f82 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -23,8 +23,8 @@ import numpy as np from pathvalidate import sanitize_filename from voluptuous import ( - ALLOW_EXTRA, All, + ALLOW_EXTRA, Any, BooleanInvalid, Capitalize, @@ -89,11 +89,12 @@ def str_to_bool1_1(x): # pylint: disable=invalid-name else x ) if not isinstance(x, (bool, int)): - raise BooleanInvalid( + msg = ( 'Type boolean value was expected, type ' f'{getattr(type(x), "__name__", str(type(x)))} ' f'value\n\n{x}\n\nwas provided' ) + raise BooleanInvalid(msg) return bool(x) @@ -1265,9 +1266,12 @@ def schema(config_dict): "2-nuisance_regression", "space", ] and isinstance(multiple_invalid.errors[0], CoerceInvalid): - raise CoerceInvalid( + msg = ( 'Nusiance regression space is not forkable. Please choose ' - f'only one of {valid_options["space"]}', + f'only one of {valid_options["space"]}' + ) + raise CoerceInvalid( + msg, path=multiple_invalid.path, ) from multiple_invalid raise multiple_invalid @@ -1294,24 +1298,26 @@ def schema(config_dict): ]["space"] != "template" ): - raise ExclusiveInvalid( + msg = ( "``single_step_resampling_from_stc`` requires " "template-space nuisance regression. Either set " "``nuisance_corrections: 2-nuisance_regression: space`` " f"to ``template`` {or_else}" ) + raise ExclusiveInvalid(msg) if any( registration != "ANTS" for registration in partially_validated["registration_workflows"][ "anatomical_registration" ]["registration"]["using"] ): - raise ExclusiveInvalid( + msg = ( "``single_step_resampling_from_stc`` requires " "ANTS registration. Either set " "``registration_workflows: anatomical_registration: " f"registration: using`` to ``ANTS`` {or_else}" ) + raise ExclusiveInvalid(msg) except KeyError: pass try: @@ -1339,12 +1345,15 @@ def schema(config_dict): Length(min=1, max=1)(mec["motion_correction"]["using"]) except LengthInvalid: mec_path = ["functional_preproc", "motion_estimates_and_correction"] - raise LengthInvalid( # pylint: disable=raise-missing-from + msg = ( f'If data[{"][".join(map(repr, mec_path))}][\'run\'] is ' # length must be between 1 and # len(valid_options['motion_correction']) once #1935 is # resolved - 'True, length of list must be exactly 1', + 'True, length of list must be exactly 1' + ) + raise LengthInvalid( # pylint: disable=raise-missing-from + msg, path=[*mec_path, "motion_correction", "using"], ) except KeyError: @@ -1359,10 +1368,11 @@ def schema(config_dict): "create_regressors" ] ): - raise ExclusiveInvalid( + msg = ( "[!] Ingress_regressors and create_regressors can't both run! " " Try turning one option off.\n " ) + raise ExclusiveInvalid(msg) except KeyError: pass try: @@ -1379,12 +1389,13 @@ def schema(config_dict): except (ImportError, ModuleNotFoundError, OSError) as error: import site - raise OSError( + msg = ( "U-Net brain extraction requires torch to be installed, " "but the installation path in this container is " "read-only. Please bind a local writable path to " f'"{site.USER_BASE}" in the container to use U-Net.' - ) from error + ) + raise OSError(msg) from error except KeyError: pass return partially_validated diff --git a/CPAC/pipeline/test/sample_data.py b/CPAC/pipeline/test/sample_data.py index e5e1097bb5..e25c8fc0cd 100644 --- a/CPAC/pipeline/test/sample_data.py +++ b/CPAC/pipeline/test/sample_data.py @@ -1,6 +1,6 @@ sub_list = [ { - "anat": "/fake/data/sub-0001/ses-NFB3/anat/" "sub-0001_ses-NFB3_T1w.nii.gz", + "anat": "/fake/data/sub-0001/ses-NFB3/anat/sub-0001_ses-NFB3_T1w.nii.gz", "func": { "MSIT": { "fmap_mag": "/fake/data/sub-0001/ses-NFB3/fmap/" diff --git a/CPAC/pipeline/test/test_cpac_group_runner.py b/CPAC/pipeline/test/test_cpac_group_runner.py index c64307565a..d8a218ca19 100644 --- a/CPAC/pipeline/test/test_cpac_group_runner.py +++ b/CPAC/pipeline/test/test_cpac_group_runner.py @@ -1,6 +1,31 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from logging import basicConfig, INFO + +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.pipeline.test") +basicConfig(format="%(message)s", level=INFO) + + def run_gather_outputs_func(pipeline_out_dir): from CPAC.pipeline import cpac_group_runner as cgr - cgr.gather_outputs( + df_dct = cgr.gather_outputs( pipeline_out_dir, ["functional_to_standard"], None, False, False, get_func=True ) + logger.info(df_dct) diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index 316ffe1a06..c228fc3640 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -10,11 +10,11 @@ load_cpac_pipe_config, ) from CPAC.pipeline.engine import ( - ResourcePool, ingress_pipeconfig_paths, ingress_raw_anat_data, ingress_raw_func_data, initiate_rpool, + ResourcePool, ) from CPAC.utils.bids_utils import create_cpac_data_config diff --git a/CPAC/pipeline/test/test_nipype_pipeline_engine.py b/CPAC/pipeline/test/test_nipype_pipeline_engine.py index 40cdf01b67..4e00278bff 100644 --- a/CPAC/pipeline/test/test_nipype_pipeline_engine.py +++ b/CPAC/pipeline/test/test_nipype_pipeline_engine.py @@ -8,10 +8,10 @@ from CPAC.pipeline.nipype_pipeline_engine import ( DEFAULT_MEM_GB, + get_data_size, MapNode, Node, Workflow, - get_data_size, ) diff --git a/CPAC/pypeer/peer.py b/CPAC/pypeer/peer.py index 97644a8282..9f4d176b8e 100644 --- a/CPAC/pypeer/peer.py +++ b/CPAC/pypeer/peer.py @@ -1,3 +1,19 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import csv import glob import os @@ -5,6 +21,8 @@ import numpy as np import nibabel as nib +from CPAC.utils.monitoring import IFLOGGER + # check if they have PyPEER installed try: import PyPEER @@ -20,11 +38,12 @@ train_model, ) except ImportError: - raise ImportError( + msg = ( "\n\n[!] PyPEER is not installed. Please double-" "check your Python environment and ensure that the " "PyPEER package is available." ) + raise ImportError(msg) def make_pypeer_dir(dirpath): @@ -32,11 +51,12 @@ def make_pypeer_dir(dirpath): if not os.path.isdir(dirpath): os.makedirs(dirpath) except: - raise Exception( + msg = ( "\n\n[!] Could not create the output directory for " "PyPEER. Double-check your permissions?\n\nAttempted " - "directory path:\n{0}\n".format(dirpath) + f"directory path:\n{dirpath}\n" ) + raise Exception(msg) def pypeer_eye_masking(data_path, eye_mask_path): @@ -104,6 +124,13 @@ def prep_for_pypeer( scrub=False, scrub_thresh=None, ): + IFLOGGER.info( + "\n\n=== C-PAC now executing PyPEER for %s. ===\n\nPEER scans for training" + " model:\n%s\n\nData scans to estimate eye movements for:\n%s\n\n", + sub_id, + peer_scan_names, + data_scan_names, + ) # note these are non-nuisance-regression strategy paths cpac_func_standard_paths = os.path.join( output_dir, "pipeline_*", sub_id, "functional_to_standard", "_scan_*", "*.nii*" @@ -111,11 +138,12 @@ def prep_for_pypeer( func_standard_paths = glob.glob(cpac_func_standard_paths) if not func_standard_paths: - raise Exception( + msg = ( "\n\n[!] Could not find any 'functional_to_standard' " "file paths in your output directory - did your " "C-PAC run complete successfully?\n\n" ) + raise Exception(msg) eye_mask_glob = os.path.join( output_dir, "pipeline_*", sub_id, "template_eye_mask", "*" @@ -123,11 +151,13 @@ def prep_for_pypeer( eye_mask_path = glob.glob(eye_mask_glob)[0] if not os.path.isfile(eye_mask_path): - raise Exception( - "\n\n[!] Could not find the template eye mask " - "file path in your output directory - did your " - "C-PAC run complete successfully?\n\n" + msg = ( + "\n\n[!] Could not find the template eye mask file path in your output" + " directory - did your C-PAC run complete successfully?\n\n" ) + raise FileNotFoundError(msg) + + IFLOGGER.info("Found input files:\n%s\n", func_standard_paths) pypeer_outdir = func_standard_paths[0].split("functional_to_standard")[0] pypeer_outdir = os.path.join(pypeer_outdir, "PyPEER") @@ -141,25 +171,29 @@ def prep_for_pypeer( scan_label = func_path.split("/")[-2].replace("_scan_", "") if scan_label in peer_scan_names or scan_label in data_scan_names: + IFLOGGER.info("Eye-masking and z-score standardizing %s..", scan_label) masked_data = pypeer_eye_masking(func_path, eye_mask_path) data = pypeer_zscore(masked_data) if gsr: + IFLOGGER.info("Global signal regression for %s..", scan_label) data = global_signal_regression(data, eye_mask_path) removed_indices = None if scrub and scan_label in peer_scan_names: + IFLOGGER.info("Motion scrubbing (Power 2012) for %s..", scan_label) fd_path = func_path.replace( "functional_to_standard", "frame_wise_displacement_power" ) fd_path = fd_path.replace(fd_path.split("/")[-1], "FD.1D") if not os.path.isfile(fd_path): - raise Exception( + msg = ( "\n\n[!] Could not find the mean framewise " "displacement 1D file in your C-PAC output " "directory." ) + raise Exception(msg) removed_indices = motion_scrub(fd_path, scrub_thresh) @@ -170,6 +204,9 @@ def prep_for_pypeer( data_scans[func_path] = [raveled_data, scan_label] for peer_scan_path in peer_scans.keys(): + IFLOGGER.info( + "Training the eye estimation model using:\n%s\n\n", peer_scan_path + ) data = peer_scans[peer_scan_path][0] peername = peer_scans[peer_scan_path][1] removed_indices = peer_scans[peer_scan_path][2] @@ -180,7 +217,7 @@ def prep_for_pypeer( data_for_training, calibration_points_removed, stim_path ) - model_dir = os.path.join(pypeer_outdir, "peer_model-{0}".format(peername)) + model_dir = os.path.join(pypeer_outdir, f"peer_model-{peername}") make_pypeer_dir(model_dir) save_model( @@ -193,6 +230,7 @@ def prep_for_pypeer( ) for data_scan_path in data_scans.keys(): + IFLOGGER.info("Estimating eye movements for:\n%s\n\n", data_scan_path) data = data_scans[data_scan_path][0] name = data_scans[data_scan_path][1] @@ -200,7 +238,7 @@ def prep_for_pypeer( xfix, yfix = predict_fixations(xmodel, ymodel, data) estimate_dir = os.path.join( - pypeer_outdir, "estimations-{0}_model-{1}".format(name, peername) + pypeer_outdir, f"estimations-{name}_model-{peername}" ) make_pypeer_dir(estimate_dir) diff --git a/CPAC/qc/qc.py b/CPAC/qc/qc.py index 2307147fc6..144ad3b0c0 100644 --- a/CPAC/qc/qc.py +++ b/CPAC/qc/qc.py @@ -1,3 +1,19 @@ +# Copyright (C) 2013-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import matplotlib as mpl mpl.use("Agg", force=True) @@ -282,7 +298,7 @@ def qa_montages( function=gen_histogram, as_module=True, ), - name="hist_{0}_{1}".format(measure, num_strat), + name=f"hist_{measure}_{num_strat}", iterfield=["measure_file"], ) else: @@ -293,7 +309,7 @@ def qa_montages( function=gen_histogram, as_module=True, ), - name="hist_{0}_{1}".format(measure, num_strat), + name=f"hist_{measure}_{num_strat}", ) histogram.inputs.measure = measure @@ -313,8 +329,9 @@ def qa_montages( qc_montage_id_s[idx] = "%s_s" % measure qc_hist_id[idx] = "%s_hist" % measure - except Exception: - pass + except Exception as e: + msg = f"[!] Connection of QA montages workflow for {measure} has failed.\n" + raise OSError(msg) from e def create_qc_snr(wf_name="qc_snr"): diff --git a/CPAC/qc/utils.py b/CPAC/qc/utils.py index 818f6d283e..5e04296b00 100644 --- a/CPAC/qc/utils.py +++ b/CPAC/qc/utils.py @@ -1,3 +1,19 @@ +# Copyright (C) 2013-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import subprocess @@ -8,6 +24,8 @@ import pkg_resources as p import nibabel as nib +from CPAC.utils.monitoring import IFLOGGER + mpl.use("Agg") from matplotlib import cm, gridspec as mgs, pyplot as plt from matplotlib.colors import ListedColormap @@ -34,8 +52,12 @@ def generate_qc_pages(qc_dir): try: if not os.path.exists(qc_dir): os.makedirs(qc_dir) - except IOError: - raise IOError + except OSError as os_error: + msg = ( + "\n\n[!] Could not create a directory for the QC dashboard. Please check" + f" write permissions.\n\nDirectory attempted:\n {qc_dir}" + ) + raise OSError(msg) from os_error files = [] for root, _, fs in os.walk(qc_dir): @@ -547,6 +569,18 @@ def determine_start_and_end(data, direction, percent): return start, end +def _log_graphing_error(which_montagee: str, image_name: str, error: Exception): + IFLOGGER.error( + "\n[!] QC Interface: Had a problem with creating the %s montage for %s" + "\n\nDetails:%s. This error might occur because of a registration error" + " encountered while using ANTs.\nPlease refer to the png image located in your" + " working directory for more insight.", + which_montage, + image_name, + error, + ) + + def montage_axial(overlay, underlay, png_name, cbar_name): """Draws Montage using overlay on Anatomical brain in Axial Direction, calls make_montage_axial. @@ -669,9 +703,8 @@ def make_montage_axial(overlay, underlay, png_name, cbar_name): break try: im = grid[i].imshow(np.rot90(Y[:, :, zz]), cmap=cm.Greys_r) - except IndexError: - # TODO: send this to the logger instead - pass + except IndexError as index_error: + _log_graphing_error("axial", png_name, index_error) zz += spacing x, y, z = X.shape @@ -709,9 +742,8 @@ def make_montage_axial(overlay, underlay, png_name, cbar_name): vmin=-max_, vmax=max_, ) - except IndexError: - # TODO: send this to the logger instead - pass + except IndexError as index_error: + _log_graphing_error("axial", png_name, index_error) grid[i].axes.get_xaxis().set_visible(False) grid[i].axes.get_yaxis().set_visible(False) @@ -879,9 +911,8 @@ def make_montage_sagittal(overlay, underlay, png_name, cbar_name): try: im = grid[i].imshow(np.rot90(Y[xx, :, :]), cmap=cm.Greys_r) - except IndexError: - # TODO: send this to the logger instead - pass + except IndexError as index_error: + _log_graphing_error("sagittal", png_name, index_error) grid[i].get_xaxis().set_visible(False) grid[i].get_yaxis().set_visible(False) @@ -921,9 +952,8 @@ def make_montage_sagittal(overlay, underlay, png_name, cbar_name): vmin=-max_, vmax=max_, ) - except IndexError: - # TODO: send this to the logger instead - pass + except IndexError as index_error: + _log_graphing_error("sagittal", png_name, index_error) xx += spacing @@ -942,9 +972,13 @@ def make_montage_sagittal(overlay, underlay, png_name, cbar_name): ): cbar.ax.set_yticks(np.linspace(-max_, max_, 8)) - except AttributeError: - # TODO: send this to the logger instead - pass + except AttributeError as attribute_error: + IFLOGGER.error( + "\n[!] QC Interface: Had a problem with creating the sagittal montage for" + " %s\n\nDetails:%s\n", + png_name, + attribute_error, + ) plt.axis("off") png_name = os.path.join(os.getcwd(), png_name) @@ -1283,7 +1317,7 @@ def make_resample_1mm(file_): new_fname = "".join([remainder, "_1mm", ext]) new_fname = os.path.join(os.getcwd(), os.path.basename(new_fname)) - cmd = " 3dresample -dxyz 1.0 1.0 1.0 -prefix %s " "-inset %s " % (new_fname, file_) + cmd = f" 3dresample -dxyz 1.0 1.0 1.0 -prefix {new_fname} -inset {file_} " subprocess.getoutput(cmd) return new_fname diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index ebcaff6020..f31d34e18c 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -161,10 +161,11 @@ def dvcorr(dvars, fdj): dvars = np.loadtxt(dvars) fdj = np.loadtxt(fdj) if len(dvars) != len(fdj) - 1: - raise ValueError( + msg = ( "len(DVARS) should be 1 less than len(FDJ), but their respective " f"lengths are {len(dvars)} and {len(fdj)}." ) + raise ValueError(msg) return np.corrcoef(dvars, fdj[1:])[0, 1] @@ -456,7 +457,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): Function( input_names=["subject", "scan", "wf_name"], output_names=["subject", "session", "task", "run"], - imports=["from bids.layout import " "parse_file_entities"], + imports=["from bids.layout import parse_file_entities"], function=get_bids_info, as_module=True, ), diff --git a/CPAC/qpp/qpp.py b/CPAC/qpp/qpp.py index 0846b33865..0ae0107bba 100644 --- a/CPAC/qpp/qpp.py +++ b/CPAC/qpp/qpp.py @@ -127,10 +127,11 @@ def detect_qpp( [r["correlation_score"] if r else 0.0 for r in permutation_result] ) if not np.any(correlation_scores): - raise Exception( + msg = ( "C-PAC could not find QPP in your data. " "Please lower your correlation threshold and try again." ) + raise Exception(msg) max_correlation = np.argsort(correlation_scores)[-1] best_template = permutation_result[max_correlation]["template"] diff --git a/CPAC/randomise/randomise.py b/CPAC/randomise/randomise.py index 46a51756f2..8c2351c9f0 100644 --- a/CPAC/randomise/randomise.py +++ b/CPAC/randomise/randomise.py @@ -1,4 +1,21 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.monitoring import IFLOGGER def select(input_list): @@ -8,9 +25,14 @@ def select(input_list): img = nib.load(i) hdr = img.header if hdr["cal_min"] == 0 and hdr["cal_max"] == 0: - pass + IFLOGGER.warning( + "Warning! %s is an empty image because of no positive values in the" + " unpermuted statistic image, and it could not be processed with" + " tfce.", + i, + ) if not hdr["cal_max"] == 0 and hdr["cal_min"] == 0: - pass + return i return i @@ -36,9 +58,7 @@ def prep_randomise_workflow( wf = pe.Workflow(name="randomise_workflow") wf.base_dir = c.work_dir - randomise = pe.Node( - interface=fsl.Randomise(), name="fsl-randomise_{0}".format(model_name) - ) + randomise = pe.Node(interface=fsl.Randomise(), name=f"fsl-randomise_{model_name}") randomise.inputs.base_name = model_name randomise.inputs.in_file = merged_file randomise.inputs.mask = mask_file diff --git a/CPAC/randomise/test_randomise.py b/CPAC/randomise/test_randomise.py index 06feda7672..03d5945ce6 100644 --- a/CPAC/randomise/test_randomise.py +++ b/CPAC/randomise/test_randomise.py @@ -1,3 +1,20 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test randomise.""" import os import pytest @@ -12,6 +29,7 @@ "inputs", [["subjects", "design_matrix_file", "contrast_file", "permutations"]] ) def test_run_randomize(inputs, output_dir=None, run=True): + """Test randomize.""" from . import pipeline randomise_workflow = pe.Workflow(name="preproc") @@ -54,11 +72,10 @@ def test_run_randomize(inputs, output_dir=None, run=True): randomise_workflow.connect( t_node, "outputspec.t_corrected_p_files", dataSink, "t_corrected_p_files" ) - if run is True: + if run: plugin_args = {"n_procs": num_of_cores} randomise_workflow.run( plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args ) return None - else: - return randomise_workflow, randomise_workflow.base_dir + return randomise_workflow, randomise_workflow.base_dir diff --git a/CPAC/registration/output_func_to_standard.py b/CPAC/registration/output_func_to_standard.py index 2119798de0..44c8edfb22 100644 --- a/CPAC/registration/output_func_to_standard.py +++ b/CPAC/registration/output_func_to_standard.py @@ -1,3 +1,20 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Transform functional images to template space.""" from nipype.interfaces import ants, c3, fsl from nipype.interfaces.afni import utils as afni_utils import nipype.interfaces.utility as util @@ -27,9 +44,9 @@ def fsl_apply_transform_func_to_mni( func_ts=False, num_cpus=1, ): - """ - Applies previously calculated FSL registration transforms to input - images. This workflow employs the FSL applywarp tool: + """Apply previously calculated FSL registration transforms to input images. + + This workflow employs the FSL applywarp tool: https://fsl.fmrib.ox.ac.uk/fslcourse/lectures/practicals/registration/index.html @@ -94,11 +111,11 @@ def fsl_apply_transform_func_to_mni( # parallelize time series warp application map_node = True - if map_node is True: + if map_node: # func_mni_warp func_mni_warp = pe.MapNode( interface=fsl.ApplyWarp(), - name="func_mni_fsl_warp_{0}_{1:d}".format(output_name, num_strat), + name=f"func_mni_fsl_warp_{output_name}_{num_strat:d}", iterfield=["in_file"], mem_gb=1.5, ) @@ -106,16 +123,16 @@ def fsl_apply_transform_func_to_mni( # func_mni_warp func_mni_warp = pe.Node( interface=fsl.ApplyWarp(), - name="func_mni_fsl_warp_{0}_{1:d}".format(output_name, num_strat), + name=f"func_mni_fsl_warp_{output_name}_{num_strat:d}", ) func_mni_warp.inputs.interp = interpolation_method # parallelize the apply warp, if multiple CPUs, and it's a time series! if int(num_cpus) > 1 and func_ts: - node_id = "{0}_{1:d}".format(output_name, num_strat) + node_id = f"{output_name}_{num_strat:d}" - chunk_imports = ["import nibabel as nb"] + chunk_imports = ["import nibabel as nib"] chunk = pe.Node( Function( input_names=["func_file", "n_cpus"], @@ -168,7 +185,7 @@ def fsl_apply_transform_func_to_mni( if output_name == "functional_to_standard": write_composite_xfm = pe.Node( interface=fsl.ConvertWarp(), - name="combine_fsl_warps_{0}_{1:d}".format(output_name, num_strat), + name=f"combine_fsl_warps_{output_name}_{num_strat:d}", ) workflow.connect(ref_node, ref_out_file, write_composite_xfm, "reference") @@ -187,7 +204,7 @@ def fsl_apply_transform_func_to_mni( if "functional_to_mni_linear_xfm" not in strat: combine_transforms = pe.Node( interface=fsl.ConvertXFM(), - name="combine_fsl_xforms_{0}_{1:d}".format(output_name, num_strat), + name=f"combine_fsl_xforms_{output_name}_{num_strat:d}", ) combine_transforms.inputs.concat_xfm = True @@ -208,7 +225,8 @@ def fsl_apply_transform_func_to_mni( workflow.connect(combine_transforms, outfile, func_mni_warp, "premat") else: - raise ValueError("Could not find flirt or fnirt registration in nodes") + msg = "Could not find flirt or fnirt registration in nodes" + raise ValueError(msg) strat.append_name(func_mni_warp.name) @@ -233,16 +251,11 @@ def ants_apply_warps_func_mni( func_type="non-ica-aroma", num_cpus=1, ): - """ - Applies previously calculated ANTS registration transforms to input - images. This workflow employs the antsApplyTransforms tool: + """Apply previously calculated ANTS registration transforms to input images. - http://stnava.github.io/ANTs/ + This workflow employs the antsApplyTransforms tool: - Parameters - ---------- - name : string, optional - Name of the workflow. + http://stnava.github.io/ANTs/ Returns ------- @@ -318,9 +331,6 @@ def ants_apply_warps_func_mni( Apply the functional-to-structural and structural-to-template warps to the 4D functional time-series to warp it to template space. - - Parameters - ---------- """ # if the input is a string, assume that it is resource pool key, # if it is a tuple, assume that it is a node, outfile pair, @@ -341,14 +351,12 @@ def ants_apply_warps_func_mni( # when inverse is enabled, we want to update the name of various # nodes so that we know they were inverted inverse_string = "" - if inverse is True: + if inverse: inverse_string = "_inverse" # make sure that resource pool has some required resources before proceeding if "fsl_mat_as_itk" not in strat and registration_template == "t1": - fsl_reg_2_itk = pe.Node( - c3.C3dAffineTool(), name="fsl_reg_2_itk_{0}".format(num_strat) - ) + fsl_reg_2_itk = pe.Node(c3.C3dAffineTool(), name=f"fsl_reg_2_itk_{num_strat}") fsl_reg_2_itk.inputs.itk_transform = True fsl_reg_2_itk.inputs.fsl2ras = True @@ -371,7 +379,7 @@ def ants_apply_warps_func_mni( function=change_itk_transform_type, imports=itk_imports, ), - name="change_transform_type_{0}".format(num_strat), + name=f"change_transform_type_{num_strat}", ) workflow.connect( @@ -386,9 +394,9 @@ def ants_apply_warps_func_mni( # stack of transforms to be combined to acheive the desired transformation num_transforms = 5 - collect_transforms_key = "collect_transforms{0}".format(inverse_string) + collect_transforms_key = f"collect_transforms{inverse_string}" - if distcor is True and func_type not in "ica-aroma": + if distcor and func_type not in "ica-aroma": num_transforms = 6 collect_transforms_key = "collect_transforms{0}{1}".format( "_distcor", inverse_string @@ -424,8 +432,8 @@ def ants_apply_warps_func_mni( # the resource pool key related to the resource that should be # connected in, and the second element is the input to which it # should be connected - if inverse is True: - if distcor is True and func_type not in "ica-aroma": + if inverse: + if distcor and func_type not in "ica-aroma": # Field file from anatomical nonlinear registration transforms_to_combine = [ ("mni_to_anatomical_nonlinear_xfm", "in6"), @@ -452,7 +460,7 @@ def ants_apply_warps_func_mni( ("fsl_mat_as_itk", "in5"), ] - if distcor is True and func_type not in "ica-aroma": + if distcor and func_type not in "ica-aroma": transforms_to_combine.append(("blip_warp", "in6")) if registration_template == "epi": @@ -483,8 +491,8 @@ def ants_apply_warps_func_mni( # the resource pool key related to the resource that should be # connected in, and the second element is the input to which it # should be connected - if inverse is True: - if distcor is True and func_type not in "ica-aroma": + if inverse: + if distcor and func_type not in "ica-aroma": # Field file from anatomical nonlinear registration transforms_to_combine = [ ("epi_to_func_nonlinear_xfm", "in4"), @@ -522,7 +530,7 @@ def ants_apply_warps_func_mni( ants_transformation_dict[symmetry][transform_key] ] except KeyError: - raise Exception(locals()) + raise KeyError(locals()) workflow.connect(node, out_file, collect_transforms, input_port) # check transform list (if missing any init/rig/affine) and exclude Nonetype @@ -567,7 +575,7 @@ def ants_apply_warps_func_mni( strat.append_name(inverse_transform_flags.name) #### now we add in the apply ants warps node - if int(num_cpus) > 1 and input_image_type == 3: + if int(num_cpus) > 1 and input_image_type == 3: # noqa: PLR2004 # parallelize time series warp application map_node = True @@ -594,7 +602,7 @@ def ants_apply_warps_func_mni( apply_ants_warp.inputs.out_postfix = "_antswarp" apply_ants_warp.interface.num_threads = int(num_ants_cores) - if inverse is True: + if inverse: workflow.connect( inverse_transform_flags, "inverse_transform_flags", @@ -673,10 +681,10 @@ def ants_apply_warps_func_mni( # }) # parallelize the apply warp, if multiple CPUs, and it's a time series! - if int(num_cpus) > 1 and input_image_type == 3: + if int(num_cpus) > 1 and input_image_type == 3: # noqa: PLR2004 node_id = f"_{output_name}_{inverse_string}_{registration_template}_{num_strat}" - chunk_imports = ["import nibabel as nb"] + chunk_imports = ["import nibabel as nib"] chunk = pe.Node( Function( input_names=["func_file", "n_cpus"], @@ -739,14 +747,15 @@ def output_func_to_standard( registration_template="t1", func_type="non-ica-aroma", ): + """Apply previously calculated functional-to-standard transforms.""" image_types = ["func_derivative", "func_derivative_multi", "func_4d", "func_mask"] if input_image_type not in image_types: - raise ValueError( - "Input image type {0} should be one of {1}".format( - input_image_type, ", ".join(image_types) - ) + msg = ( + f"Input image type {input_image_type} should be one of" + f" {', '.join(image_types)}" ) + raise ValueError(msg) nodes = strat.get_nodes_names() @@ -822,9 +831,10 @@ def output_func_to_standard( ) else: - raise ValueError( - "Cannot determine whether a ANTS or FSL registration" - "is desired, check your pipeline." + msg = ( + "Cannot determine whether a ANTS or FSL registration is desired, check" + " your pipeline." ) + raise ValueError(msg) return workflow diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index ebd0784d0e..9e6e3c4906 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -15,8 +15,10 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . # pylint: disable=too-many-lines,ungrouped-imports,wrong-import-order +"""Workflows for registration.""" from typing import Optional +from voluptuous import RequiredFieldInvalid from nipype.interfaces import afni, ants, c3, fsl, utility as util from nipype.interfaces.afni import utils as afni_utils @@ -49,12 +51,13 @@ def apply_transform( num_cpus=1, num_ants_cores=1, ): + """Apply transform.""" if not reg_tool: - raise Exception( - "\n[!] Developer info: the 'reg_tool' parameter sent " - f"to the 'apply_transform' node for '{wf_name}' is " - f"empty.\n" + msg = ( + "\n[!] Developer info: the 'reg_tool' parameter sent to the" + f" 'apply_transform' node for '{wf_name}' is empty.\n" ) + raise RequiredFieldInvalid(msg) wf = pe.Workflow(name=wf_name) @@ -130,7 +133,7 @@ def apply_transform( # parallelize the apply warp, if multiple CPUs, and it's a time # series! if int(num_cpus) > 1 and time_series: - chunk_imports = ["import nibabel as nb"] + chunk_imports = ["import nibabel as nib"] chunk = pe.Node( util.Function( input_names=["func_file", "n_chunks", "chunk_size"], @@ -217,7 +220,7 @@ def apply_transform( # parallelize the apply warp, if multiple CPUs, and it's a time # series! if int(num_cpus) > 1 and time_series: - chunk_imports = ["import nibabel as nb"] + chunk_imports = ["import nibabel as nib"] chunk = pe.Node( util.Function( input_names=["func_file", "n_chunks", "chunk_size"], @@ -352,19 +355,16 @@ def convert_pedir(pedir, convert="xyz_to_int"): if isinstance(pedir, bytes): pedir = pedir.decode() if not isinstance(pedir, str): - raise Exception( - "\n\nPhase-encoding direction must be a " - "string value.\n\nValue: {0}" - "\n\n".format(pedir) - ) + msg = f"\n\nPhase-encoding direction must be a string value.\n\nValue: {pedir}\n\n" + raise ValueError(msg) if pedir not in conv_dct.keys(): - raise Exception( - "\n\nInvalid phase-encoding direction " "entered: {0}\n\n".format(pedir) - ) + msg = f"\n\nInvalid phase-encoding direction entered: {pedir}\n\n" + raise ValueError(msg) return conv_dct[pedir] def create_fsl_flirt_linear_reg(name="fsl_flirt_linear_reg"): + """Create a FLIRT workflow.""" linear_register = pe.Workflow(name=name) inputspec = pe.Node( @@ -405,9 +405,7 @@ def create_fsl_flirt_linear_reg(name="fsl_flirt_linear_reg"): def create_fsl_fnirt_nonlinear_reg(name="fsl_fnirt_nonlinear_reg"): - """ - Performs non-linear registration of an input file to a reference file - using FSL FNIRT. + """Perform non-linear registration of an input to a reference using FSL FNIRT. Parameters ---------- @@ -516,9 +514,7 @@ def create_fsl_fnirt_nonlinear_reg(name="fsl_fnirt_nonlinear_reg"): def create_fsl_fnirt_nonlinear_reg_nhp(name="fsl_fnirt_nonlinear_reg_nhp"): - """ - Performs non-linear registration of an input file to a reference file - using FSL FNIRT. + """Perform non-linear registration of an input to a reference using FSL FNIRT. Parameters ---------- @@ -681,9 +677,9 @@ def create_fsl_fnirt_nonlinear_reg_nhp(name="fsl_fnirt_nonlinear_reg_nhp"): def create_register_func_to_anat( config, phase_diff_distcor=False, name="register_func_to_anat" ): - """ - Registers a functional scan in native space to anatomical space using a - linear transform and does not include bbregister. + """Register a functional scan in native space to anatomical space... + + ...using a linear transform and does not include bbregister. Parameters ---------- @@ -804,12 +800,9 @@ def create_register_func_to_anat( def create_register_func_to_anat_use_T2(config, name="register_func_to_anat_use_T2"): - # for monkey data - # ref: https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/GenericfMRIVolumeProcessingPipeline.sh#L287-L295 - # https://github.com/HechengJin0/dcan-macaque-pipeline/blob/master/fMRIVolume/GenericfMRIVolumeProcessingPipeline.sh#L524-L535 - """ - Registers a functional scan in native space to anatomical space using a - linear transform and does not include bbregister, use T1 and T2 image. + """Register a functional scan in native space to anatomical space... + + ...using a linear transform and does not include bbregister, use T1 and T2 image. Parameters ---------- @@ -824,6 +817,10 @@ def create_register_func_to_anat_use_T2(config, name="register_func_to_anat_use_ Notes ----- + for monkey data + ref: https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/GenericfMRIVolumeProcessingPipeline.sh#L287-L295 + https://github.com/HechengJin0/dcan-macaque-pipeline/blob/master/fMRIVolume/GenericfMRIVolumeProcessingPipeline.sh#L524-L535 + Workflow Inputs:: inputspec.func : string (nifti file) @@ -962,9 +959,9 @@ def create_register_func_to_anat_use_T2(config, name="register_func_to_anat_use_ def create_bbregister_func_to_anat( phase_diff_distcor=False, name="bbregister_func_to_anat" ): - """ - Registers a functional scan in native space to structural. This is - meant to be used after create_nonlinear_register() has been run and + """Register a functional scan in native space to structural. + + This is meant to be used after create_nonlinear_register() has been run and relies on some of its outputs. Parameters @@ -1109,9 +1106,9 @@ def bbreg_args(bbreg_target): def create_wf_calculate_ants_warp( name="create_wf_calculate_ants_warp", num_threads=1, reg_ants_skull=1 ): - """ - Calculates the nonlinear ANTS registration transform. This workflow - employs the antsRegistration tool: + """Calculate the nonlinear ANTS registration transform. + + This workflow employs the antsRegistration tool: http://stnava.github.io/ANTs/ @@ -1456,6 +1453,7 @@ def create_wf_calculate_ants_warp( def FSL_registration_connector( wf_name, cfg, orig="T1w", opt=None, symmetric=False, template="T1w" ): + """Transform raw data to template with FSL.""" wf = pe.Workflow(name=wf_name) inputNode = pe.Node( @@ -1658,6 +1656,7 @@ def FSL_registration_connector( def ANTs_registration_connector( wf_name, cfg, params, orig="T1w", symmetric=False, template="T1w" ): + """Transform raw data to template with ANTs.""" wf = pe.Workflow(name=wf_name) inputNode = pe.Node( @@ -1688,12 +1687,11 @@ def ANTs_registration_connector( if params is None: err_msg = ( - "\n\n[!] C-PAC says: \nYou have selected ANTs as your " - "anatomical registration method.\n" - "However, no ANTs parameters were specified.\n" - "Please specify ANTs parameters properly and try again." + "\n\n[!] C-PAC says: \nYou have selected ANTs as your" + " anatomical registration method.\nHowever, no ANTs parameters were" + " specified.\nPlease specify ANTs parameters properly and try again." ) - raise Exception(err_msg) + raise RequiredFieldInvalid(err_msg) ants_reg_anat_mni = create_wf_calculate_ants_warp( f"anat_mni_ants_register{symm}", @@ -2086,6 +2084,7 @@ def ANTs_registration_connector( def bold_to_T1template_xfm_connector( wf_name, cfg, reg_tool, symmetric=False, blip=False ): + """Transform functional to T1w template.""" wf = pe.Workflow(name=wf_name) inputNode = pe.Node( @@ -2286,6 +2285,7 @@ def bold_to_T1template_xfm_connector( }, ) def register_FSL_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Register T1w to template with FSL.""" fsl, outputs = FSL_registration_connector( f"register_{opt}_anat_to_" f"template_{pipe_num}", cfg, orig="T1w", opt=opt ) @@ -2384,6 +2384,7 @@ def register_FSL_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def register_symmetric_FSL_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Register T1w to symmetric template with FSL.""" fsl, outputs = FSL_registration_connector( f"register_{opt}_anat_to_" f"template_symmetric_" f"{pipe_num}", cfg, @@ -2457,9 +2458,7 @@ def register_symmetric_FSL_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=N }, ) def register_FSL_EPI_to_template(wf, cfg, strat_pool, pipe_num, opt=None): - """Directly register the mean functional to an EPI template. No T1w - involved. - """ + """Directly register the mean functional to an EPI template. No T1w involved.""" fsl, outputs = FSL_registration_connector( f"register_{opt}_EPI_to_" f"template_{pipe_num}", cfg, @@ -2596,6 +2595,7 @@ def register_FSL_EPI_to_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Register T1w to template with ANTs.""" params = cfg.registration_workflows["anatomical_registration"]["registration"][ "ANTs" ]["T1_registration"] @@ -2730,6 +2730,7 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def register_symmetric_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Register T1 to symmetric template with ANTs.""" params = cfg.registration_workflows["anatomical_registration"]["registration"][ "ANTs" ]["T1_registration"] @@ -2819,9 +2820,7 @@ def register_symmetric_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt= }, ) def register_ANTs_EPI_to_template(wf, cfg, strat_pool, pipe_num, opt=None): - """Directly register the mean functional to an EPI template. No T1w - involved. - """ + """Directly register the mean functional to an EPI template. No T1w involved.""" params = cfg.registration_workflows["functional_registration"]["EPI_registration"][ "ANTs" ]["parameters"] @@ -2901,6 +2900,7 @@ def register_ANTs_EPI_to_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Overwrite ANTs transforms with FSL transforms.""" xfm_prov = strat_pool.get_cpac_provenance("from-T1w_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -2964,7 +2964,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # -mcs: -multicomponent-split, -oo: -output-multiple split_combined_warp = pe.Node( util.Function( - input_names=["input", "output_name"], + input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, ), @@ -2973,13 +2973,16 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None split_combined_warp.inputs.output_name = "e" wf.connect( - ants_apply_warp_t1_to_template, "output_image", split_combined_warp, "input" + ants_apply_warp_t1_to_template, + "output_image", + split_combined_warp, + "input_name", ) # c4d -mcs ${WD}/xfms/ANTs_CombinedInvWarp.nii.gz -oo ${WD}/xfms/e1inv.nii.gz ${WD}/xfms/e2inv.nii.gz ${WD}/xfms/e3inv.nii.gz split_combined_inv_warp = pe.Node( util.Function( - input_names=["input", "output_name"], + input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, ), @@ -2991,7 +2994,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None ants_apply_warp_template_to_t1, "output_image", split_combined_inv_warp, - "input", + "input_name", ) # fslmaths ${WD}/xfms/e2.nii.gz -mul -1 ${WD}/xfms/e-2.nii.gz @@ -3137,6 +3140,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None outputs=["sbref"], ) def coregistration_prep_vol(wf, cfg, strat_pool, pipe_num, opt=None): + """Create single-band reference for coreg by selecting a functional volume.""" get_func_volume = pe.Node(interface=afni.Calc(), name=f"get_func_volume_{pipe_num}") get_func_volume.inputs.set( @@ -3180,6 +3184,7 @@ def coregistration_prep_vol(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["sbref"], ) def coregistration_prep_mean(wf, cfg, strat_pool, pipe_num, opt=None): + """Create single-band reference for coregistration from mean BOLD.""" coreg_input = strat_pool.get_data("desc-mean_bold") # TODO add mean skull @@ -3220,6 +3225,7 @@ def coregistration_prep_mean(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["sbref"], ) def coregistration_prep_fmriprep(wf, cfg, strat_pool, pipe_num, opt=None): + """Generate fMRIPrep-style single-band reference for coregistration.""" coreg_input = strat_pool.get_data("desc-ref_bold") outputs = {"sbref": coreg_input} @@ -3260,6 +3266,7 @@ def coregistration_prep_fmriprep(wf, cfg, strat_pool, pipe_num, opt=None): ], ) def coregistration(wf, cfg, strat_pool, pipe_num, opt=None): + """Coregister BOLD to T1w.""" diff_complete = False if strat_pool.check_rpool("despiked-fieldmap") and strat_pool.check_rpool( "fieldmap-mask" @@ -3489,8 +3496,9 @@ def coregistration(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def create_func_to_T1template_xfm(wf, cfg, strat_pool, pipe_num, opt=None): - """Condense the BOLD-to-T1 coregistration transform and the T1-to-template - transform into one transform matrix. + """Create a single transform from BOLD-to-T1 coregistration and T1-to-template. + + Condense the BOLD-to-T1 coregistration transform and the T1-to-template transform into one transform matrix. """ xfm_prov = strat_pool.get_cpac_provenance("from-T1w_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -3566,8 +3574,10 @@ def create_func_to_T1template_xfm(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def create_func_to_T1template_symmetric_xfm(wf, cfg, strat_pool, pipe_num, opt=None): - """Condense the BOLD-to-T1 coregistration transform and the T1-to- - symmetric-template transform into one transform matrix. + """Create a single transform from coregistration and T1-to-symmetric-template. + + Condense the BOLD-to-T1 coregistration transform and the T1-to-symmetric-template + transform into one transform matrix. """ xfm_prov = strat_pool.get_cpac_provenance("from-T1w_to-symtemplate_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -3636,6 +3646,7 @@ def create_func_to_T1template_symmetric_xfm(wf, cfg, strat_pool, pipe_num, opt=N outputs=["sbref", "desc-preproc_bold", "desc-stc_bold", "bold"], ) def apply_phasediff_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt=None): + """Apply phasediff to timeseries.""" outputs = {"desc-preproc_bold": strat_pool.get_data("desc-preproc_bold")} if not strat_pool.check_rpool("despiked-fieldmap"): return (wf, outputs) @@ -3773,6 +3784,7 @@ def apply_phasediff_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt= outputs=["desc-preproc_bold", "desc-stc_bold", "bold"], ) def apply_blip_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt=None): + """Apply blip to timeseries.""" xfm_prov = strat_pool.get_cpac_provenance("from-bold_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -3852,6 +3864,7 @@ def apply_blip_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt=None) outputs={"space-template_desc-head_T1w": {"Template": "T1w-template"}}, ) def warp_wholeheadT1_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp T1 head to template.""" xfm_prov = strat_pool.get_cpac_provenance("from-T1w_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -3905,6 +3918,7 @@ def warp_wholeheadT1_to_template(wf, cfg, strat_pool, pipe_num, opt=None): outputs={"space-template_desc-brain_mask": {"Template": "T1w-template"}}, ) def warp_T1mask_to_template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp T1 mask to template.""" xfm_prov = strat_pool.get_cpac_provenance("from-T1w_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -3965,6 +3979,7 @@ def warp_T1mask_to_template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_timeseries_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp timeseries to T1 template.""" xfm_prov = strat_pool.get_cpac_provenance("from-bold_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -4027,6 +4042,7 @@ def warp_timeseries_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_timeseries_to_T1template_deriv(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp timeseries to T1 template at derivative resolution.""" xfm_prov = strat_pool.get_cpac_provenance("from-bold_to-template_mode-image_xfm") reg_tool = check_prov_for_regtool(xfm_prov) @@ -4099,11 +4115,15 @@ def warp_timeseries_to_T1template_deriv(wf, cfg, strat_pool, pipe_num, opt=None) }, ) def warp_timeseries_to_T1template_abcd(wf, cfg, strat_pool, pipe_num, opt=None): - # Apply motion correction, coreg, anat-to-template transforms on raw functional timeseries using ABCD-style registration - # Ref: https://github.com/DCAN-Labs/DCAN-HCP/blob/master/fMRIVolume/scripts/OneStepResampling.sh#L168-L197 + """Apply motion correction, coreg, anat-to-template transforms... - # https://github.com/DCAN-Labs/DCAN-HCP/blob/master/fMRIVolume/scripts/DistortionCorrectionAndEPIToT1wReg_FLIRTBBRAndFreeSurferBBRbased.sh#L548 - # convertwarp --relout --rel -m ${WD}/fMRI2str.mat --ref=${T1wImage} --out=${WD}/fMRI2str.nii.gz + ...on raw functional timeseries using ABCD-style registration. + + Ref: https://github.com/DCAN-Labs/DCAN-HCP/blob/master/fMRIVolume/scripts/OneStepResampling.sh#L168-L197 + + https://github.com/DCAN-Labs/DCAN-HCP/blob/master/fMRIVolume/scripts/DistortionCorrectionAndEPIToT1wReg_FLIRTBBRAndFreeSurferBBRbased.sh#L548 + convertwarp --relout --rel -m ${WD}/fMRI2str.mat --ref=${T1wImage} --out=${WD}/fMRI2str.nii.gz + """ convert_func_to_anat_linear_warp = pe.Node( interface=fsl.ConvertWarp(), name=f"convert_func_to_anat_linear_warp_{pipe_num}" ) @@ -4409,11 +4429,16 @@ def warp_timeseries_to_T1template_abcd(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_timeseries_to_T1template_dcan_nhp(wf, cfg, strat_pool, pipe_num, opt=None): - # Apply motion correction, coreg, anat-to-template transforms on raw functional timeseries - # Ref: https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/scripts/OneStepResampling.sh + """ + Apply motion correction, coreg, anat-to-template transforms... - # https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/scripts/OneStepResampling.sh#L131 - # ${FSLDIR}/bin/flirt -interp spline -in ${T1wImage} -ref ${T1wImage} -applyisoxfm $FinalfMRIResolution -out ${WD}/${T1wImageFile}.${FinalfMRIResolution} + ...on raw functional timeseries. + + Ref: https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/scripts/OneStepResampling.sh + + https://github.com/DCAN-Labs/dcan-macaque-pipeline/blob/master/fMRIVolume/scripts/OneStepResampling.sh#L131 + ${FSLDIR}/bin/flirt -interp spline -in ${T1wImage} -ref ${T1wImage} -applyisoxfm $FinalfMRIResolution -out ${WD}/${T1wImageFile}.${FinalfMRIResolution} + """ anat_resample = pe.Node( interface=fsl.FLIRT(), name=f"anat_resample_func_res_{pipe_num}" ) @@ -4753,10 +4778,9 @@ def warp_timeseries_to_T1template_dcan_nhp(wf, cfg, strat_pool, pipe_num, opt=No def single_step_resample_timeseries_to_T1template( wf, cfg, strat_pool, pipe_num, opt=None ): - """ - Apply motion correction, coreg, anat-to-template transforms on - slice-time corrected functional timeseries based on fMRIPrep - pipeline. + """Apply motion correction, coreg, anat-to-template transforms... + + ...on slice-time corrected functional timeseries based on fMRIPrep pipeline. Copyright (c) 2015-2018, the CRN developers team. All rights reserved. @@ -5048,6 +5072,7 @@ def single_step_resample_timeseries_to_T1template( }, ) def warp_sbref_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp single-band reference to T1 template.""" xfm = "from-bold_to-template_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( wf, @@ -5091,6 +5116,7 @@ def warp_sbref_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_bold_mask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp BOLD mask to T1 template.""" xfm = "from-bold_to-template_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( wf, @@ -5136,8 +5162,9 @@ def warp_bold_mask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_deriv_mask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): - """Transform the BOLD mask to template space and to the resolution set for - the derivative outputs. + """Transform the BOLD mask to template space... + + ...and to the resolution set for the derivative outputs. """ xfm = "from-bold_to-template_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( @@ -5174,6 +5201,7 @@ def warp_deriv_mask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): outputs={"space-template_desc-preproc_bold": {"Template": "EPI-template"}}, ) def warp_timeseries_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp timeseries to EPI template.""" xfm = "from-bold_to-EPItemplate_mode-image_xfm" wf, apply_xfm, resource = warp_resource_to_template( wf, cfg, strat_pool, pipe_num, "desc-preproc_bold", xfm, time_series=True @@ -5197,6 +5225,7 @@ def warp_timeseries_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): outputs={"space-template_desc-mean_bold": {"Template": "EPI-template"}}, ) def warp_bold_mean_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp mean BOLD to EPI template space.""" xfm = "from-bold_to-EPItemplate_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( wf, cfg, strat_pool, pipe_num, "desc-mean_bold", xfm, time_series=False @@ -5220,6 +5249,7 @@ def warp_bold_mean_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): outputs={"space-template_desc-bold_mask": {"Template": "EPI-template"}}, ) def warp_bold_mask_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp BOLD mask to EPI tempalate.""" xfm = "from-bold_to-EPItemplate_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( wf, @@ -5251,8 +5281,9 @@ def warp_bold_mask_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_deriv_mask_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): - """Transform the BOLD mask to template space and to the resolution set for - the derivative outputs. + """Transform the BOLD mask to EPI template space... + + ...and to the resolution set for the derivative outputs. """ xfm = "from-bold_to-EPItemplate_mode-image_xfm" wf, apply_xfm = warp_resource_to_template( @@ -5292,12 +5323,13 @@ def warp_deriv_mask_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_tissuemask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp tissue masks to T1 template.""" return warp_tissuemask_to_template( wf, cfg, strat_pool, pipe_num, - xfm="from-T1w_to-template_mode-image_" "xfm", + xfm="from-T1w_to-template_mode-image_xfm", template_space="T1", ) @@ -5326,18 +5358,19 @@ def warp_tissuemask_to_T1template(wf, cfg, strat_pool, pipe_num, opt=None): }, ) def warp_tissuemask_to_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): + """Warp tissue masks to EPI template.""" return warp_tissuemask_to_template( wf, cfg, strat_pool, pipe_num, - xfm="from-bold_to-EPItemplate_" "mode-image_xfm", + xfm="from-bold_to-EPItemplate_mode-image_xfm", template_space="EPI", ) def warp_tissuemask_to_template(wf, cfg, strat_pool, pipe_num, xfm, template_space): - """Function to apply transforms to tissue masks. + """Apply transforms to tissue masks. Parameters ---------- @@ -5390,7 +5423,7 @@ def warp_resource_to_template( reference: Optional[str] = None, time_series: Optional[bool] = False, ) -> TUPLE[pe.Workflow, pe.Workflow, str]: - """Function to warp a resource into a template space. + """Warp a resource into a template space. Parameters ---------- diff --git a/CPAC/registration/tests/mocks.py b/CPAC/registration/tests/mocks.py index 7fcdf789ef..18501c5a9a 100644 --- a/CPAC/registration/tests/mocks.py +++ b/CPAC/registration/tests/mocks.py @@ -12,7 +12,7 @@ def file_node(path, file_node_num=0): input_node = pe.Node( util.IdentityInterface(fields=["file"]), - name="file_node_{0}".format(file_node_num), + name=f"file_node_{file_node_num}", ) input_node.inputs.file = path return input_node, "file" @@ -99,9 +99,7 @@ def configuration_strategy_mock(method="FSL"): os.path.join( "/scratch", "resting_preproc_sub-M10978008_ses-NFB3_cpac105", - "temporal_dual_regression_0/_scan_test/_selector_CSF-2mmE-M_aC-WM-2mmE-DPC5_G-M_M-SDB_P-2/_spatial_map_PNAS_Smith09_rsn10_spatial_map_file_..cpac_templates..PNAS_Smith09_rsn10.nii.gz/split_raw_volumes/temp_reg_map_000{0}.nii.gz".format( - n - ), + f"temporal_dual_regression_0/_scan_test/_selector_CSF-2mmE-M_aC-WM-2mmE-DPC5_G-M_M-SDB_P-2/_spatial_map_PNAS_Smith09_rsn10_spatial_map_file_..cpac_templates..PNAS_Smith09_rsn10.nii.gz/split_raw_volumes/temp_reg_map_000{n}.nii.gz", ) for n in range(10) ], diff --git a/CPAC/registration/tests/test_ants_apply_warp.py b/CPAC/registration/tests/test_ants_apply_warp.py index 245cfc67a3..74db2df042 100644 --- a/CPAC/registration/tests/test_ants_apply_warp.py +++ b/CPAC/registration/tests/test_ants_apply_warp.py @@ -1,3 +1,19 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import pytest @@ -129,10 +145,8 @@ def test_ants_apply_warps_func_mni_mapnode(): test_name, "apply_ants_warp_dr_tempreg_maps_standard_to_original_mapnode_inverse_0", "mapflow", - "_apply_ants_warp_dr_tempreg_maps_standard_to_original_mapnode_inverse_0{0}".format( - n - ), - "temp_reg_map_000{0}_antswarp_antswarp.nii.gz".format(n), + f"_apply_ants_warp_dr_tempreg_maps_standard_to_original_mapnode_inverse_0{n}", + f"temp_reg_map_000{n}_antswarp_antswarp.nii.gz", ) for n in range(0, 10) ] @@ -270,10 +284,8 @@ def test_ants_apply_warps_func_mni_mapnode_symm(): test_name, "apply_ants_warp_dr_tempreg_maps_standard_symm_to_original_mapnode_inverse_0", "mapflow", - "_apply_ants_warp_dr_tempreg_maps_standard_symm_to_original_mapnode_inverse_0{0}".format( - n - ), - "temp_reg_map_000{0}_antswarp_antswarp.nii.gz".format(n), + f"_apply_ants_warp_dr_tempreg_maps_standard_symm_to_original_mapnode_inverse_0{n}", + f"temp_reg_map_000{n}_antswarp_antswarp.nii.gz", ) for n in range(0, 10) ] @@ -282,7 +294,7 @@ def test_ants_apply_warps_func_mni_mapnode_symm(): test_utils.pearson_correlation(orig_file, xformed_file) for orig_file, xformed_file in zip(dr_spatmaps, dr_spatmaps_after_transform) ] - + print(r) # noqa: T201 test_results = [r_value > 0.93 for r_value in r] assert all(test_results) diff --git a/CPAC/registration/utils.py b/CPAC/registration/utils.py index 5fd7310d57..2f5e7943b4 100644 --- a/CPAC/registration/utils.py +++ b/CPAC/registration/utils.py @@ -1,13 +1,34 @@ +# Copyright (C) 2014-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Utilities for registration.""" import os +import subprocess import numpy as np +from voluptuous import RequiredFieldInvalid def single_ants_xfm_to_list(transform): + """Convert a single ANTs transform to a list.""" return [transform] def interpolation_string(interpolation, reg_tool): + """Translate interpolation string to reg_tool-specific string.""" if reg_tool == "ants": pass elif reg_tool == "fsl": @@ -20,22 +41,24 @@ def interpolation_string(interpolation, reg_tool): def combine_inputs_into_list(input1, input2, input3): + """Combine inputs into a list.""" return [input1, input2, input3] def seperate_warps_list(warp_list, selection): + """Select the warp from the warp list.""" selected_warp = None for warp in warp_list: if selection == "Warp": if "3Warp" in warp or "2Warp" in warp or "1Warp" in warp: selected_warp = warp - else: - if selection in warp: - selected_warp = warp + elif selection in warp: + selected_warp = warp return selected_warp def check_transforms(transform_list): + """Check if the transform list is empty.""" transform_number = list(filter(None, transform_list)) return [(transform_number[index]) for index in range(len(transform_number))], len( transform_number @@ -43,6 +66,7 @@ def check_transforms(transform_list): def generate_inverse_transform_flags(transform_list): + """List whether each transform has an inverse.""" inverse_transform_flags = [] for transform in transform_list: # check `blip_warp_inverse` file name and rename it @@ -73,6 +97,7 @@ def hardcoded_reg( interp=None, reg_with_skull=0, ): + """Run ANTs registration.""" # TODO: expand transforms to cover all in ANTs para regcmd = ["antsRegistration"] @@ -81,93 +106,82 @@ def hardcoded_reg( if para_type == "dimensionality": if ants_para[para_index][para_type] not in [2, 3, 4]: err_msg = ( - "Dimensionality specified in ANTs parameters: %d, is not supported. " - "Change to 2, 3, or 4 and try again" - % ants_para[para_index][para_type] + "Dimensionality specified in ANTs parameters:" + f" {ants_para[para_index][para_type]}, is not supported." + " Change to 2, 3, or 4 and try again" ) - raise Exception(err_msg) - else: - regcmd.append("--dimensionality") - regcmd.append(str(ants_para[para_index][para_type])) + raise ValueError(err_msg) + regcmd.append("--dimensionality") + regcmd.append(str(ants_para[para_index][para_type])) elif para_type == "verbose": if ants_para[para_index][para_type] not in [0, 1]: err_msg = ( - "Verbose output option in ANTs parameters: %d, is not supported. " - "Change to 0 or 1 and try again" - % ants_para[para_index][para_type] + "Verbose output option in ANTs parameters:" + f" {ants_para[para_index][para_type]}, is not supported." + " Change to 0 or 1 and try again" ) - raise Exception(err_msg) - else: - regcmd.append("--verbose") - regcmd.append(str(ants_para[para_index][para_type])) + raise ValueError(err_msg) + regcmd.append("--verbose") + regcmd.append(str(ants_para[para_index][para_type])) elif para_type == "float": if ants_para[para_index][para_type] not in [0, 1]: err_msg = ( - "Float option in ANTs parameters: %d, is not supported. " - "Change to 0 or 1 and try again" - % ants_para[para_index][para_type] + "Float option in ANTs parameters:" + f" {ants_para[para_index][para_type]}, is not supported." + " Change to 0 or 1 and try again" ) - raise Exception(err_msg) - else: - regcmd.append("--float") - regcmd.append(str(ants_para[para_index][para_type])) + raise ValueError(err_msg) + regcmd.append("--float") + regcmd.append(str(ants_para[para_index][para_type])) elif para_type == "collapse-output-transforms": if ants_para[para_index][para_type] not in [0, 1]: err_msg = ( - "collapse-output-transforms specified in ANTs parameters: %d, is not supported. " - "Change to 0 or 1 and try again" - % ants_para[para_index][para_type] + "collapse-output-transforms specified in ANTs parameters:" + f" {ants_para[para_index][para_type]}, is not supported." + " Change to 0 or 1 and try again" ) - raise Exception(err_msg) - else: - regcmd.append("--collapse-output-transforms") - regcmd.append(str(ants_para[para_index][para_type])) + raise ValueError(err_msg) + regcmd.append("--collapse-output-transforms") + regcmd.append(str(ants_para[para_index][para_type])) elif para_type == "winsorize-image-intensities": if ( ants_para[para_index][para_type]["lowerQuantile"] is None or ants_para[para_index][para_type]["upperQuantile"] is None ): - err_msg = "Please specifiy lowerQuantile and upperQuantile of ANTs parameters --winsorize-image-intensities in pipeline config. " - raise Exception(err_msg) - else: - regcmd.append("--winsorize-image-intensities") - regcmd.append( - "[{0},{1}]".format( - ants_para[para_index][para_type]["lowerQuantile"], - ants_para[para_index][para_type]["upperQuantile"], - ) + err_msg = ( + "Please specifiy lowerQuantile and upperQuantile of ANTs" + " parameters --winsorize-image-intensities in pipeline config." ) + raise RequiredFieldInvalid(err_msg) + regcmd.append("--winsorize-image-intensities") + _quantile = ants_para[para_index][para_type] + regcmd.append( + f"[{_quantile['lowerQuantile']},{_quantile['upperQuantile']}]" + ) elif para_type == "initial-moving-transform": if ants_para[para_index][para_type]["initializationFeature"] is None: - err_msg = "Please specifiy initializationFeature of ANTs parameters in pipeline config. " - raise Exception(err_msg) + err_msg = ( + "Please specifiy initializationFeature of ANTs parameters in" + " pipeline config." + ) + raise RequiredFieldInvalid(err_msg) + regcmd.append("--initial-moving-transform") + initialization_feature = ants_para[para_index][para_type][ + "initializationFeature" + ] + if reg_with_skull == 1: + regcmd.append( + f"[{reference_skull},{moving_skull},{initialization_feature}]" + ) else: - regcmd.append("--initial-moving-transform") - if reg_with_skull == 1: - regcmd.append( - "[{0},{1},{2}]".format( - reference_skull, - moving_skull, - ants_para[para_index][para_type][ - "initializationFeature" - ], - ) - ) - else: - regcmd.append( - "[{0},{1},{2}]".format( - reference_brain, - moving_brain, - ants_para[para_index][para_type][ - "initializationFeature" - ], - ) - ) + regcmd.append( + f"[{reference_brain},{moving_brain},{initialization_feature}]" + ) elif para_type == "transforms": for trans_index in range(len(ants_para[para_index][para_type])): @@ -181,19 +195,14 @@ def hardcoded_reg( is None ): err_msg = ( - "Please specifiy % s Gradient Step of ANTs parameters in pipeline config. " - % trans_type - ) - raise Exception(err_msg) - else: - regcmd.append( - "{0}[{1}]".format( - trans_type, - ants_para[para_index][para_type][trans_index][ - trans_type - ]["gradientStep"], - ) + f"Please specifiy {trans_type} Gradient Step of" + " ANTs parameters in pipeline config." ) + raise RequiredFieldInvalid(err_msg) + gradient_step = ants_para[para_index][para_type][ + trans_index + ][trans_type]["gradientStep"] + regcmd.append(f"{trans_type}[{gradient_step}]") if trans_type == "SyN": if ( @@ -202,52 +211,44 @@ def hardcoded_reg( ]["gradientStep"] is None ): - err_msg = ( - "Please specifiy % s Gradient Step of ANTs parameters in pipeline config. " - % trans_type + err_msg = f"Please specifiy {trans_type} Gradient Step of ANTs parameters in pipeline config." + raise RequiredFieldInvalid(err_msg) + SyN_para = [] + SyN_para.append( + str( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["gradientStep"] ) - raise Exception(err_msg) - else: - SyN_para = [] + ) + if ( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["updateFieldVarianceInVoxelSpace"] + is not None + ): SyN_para.append( - "{0}".format( + str( ants_para[para_index][para_type][trans_index][ trans_type - ]["gradientStep"] + ]["updateFieldVarianceInVoxelSpace"] ) ) - if ( - ants_para[para_index][para_type][trans_index][ - trans_type - ]["updateFieldVarianceInVoxelSpace"] - is not None - ): - SyN_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type][ - "updateFieldVarianceInVoxelSpace" - ] - ) - ) - if ( - ants_para[para_index][para_type][trans_index][ - trans_type - ]["totalFieldVarianceInVoxelSpace"] - is not None - ): - SyN_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type][ - "totalFieldVarianceInVoxelSpace" - ] - ) + if ( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["totalFieldVarianceInVoxelSpace"] + is not None + ): + SyN_para.append( + str( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["totalFieldVarianceInVoxelSpace"] ) - SyN_para = ",".join([str(elem) for elem in SyN_para]) - regcmd.append("{0}[{1}]".format(trans_type, SyN_para)) + ) + SyN_para = ",".join([str(elem) for elem in SyN_para]) + regcmd.append(f"{trans_type}[{SyN_para}]") if ( ants_para[para_index][para_type][trans_index][trans_type][ @@ -265,67 +266,59 @@ def hardcoded_reg( ]["metric"]["numberOfBins"] is None ): - err_msg = "Please specifiy metricWeight and numberOfBins for metric MI of ANTs parameters in pipeline config." - raise Exception(err_msg) - else: - MI_para = [] + err_msg = ( + "Please specifiy metricWeight and numberOfBins for" + " metric MI of ANTs parameters in pipeline config." + ) + raise RequiredFieldInvalid(err_msg) + MI_para = [] + _metric = ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"] + MI_para.append( + f"{_metric['metricWeight']},{_metric['numberOfBins']}" + ) + if "samplingStrategy" in ants_para[para_index][para_type][ + trans_index + ][trans_type]["metric"] and ants_para[para_index][ + para_type + ][trans_index][trans_type]["metric"][ + "samplingStrategy" + ] in ["None", "Regular", "Random"]: MI_para.append( - "{0},{1}".format( + str( ants_para[para_index][para_type][trans_index][ trans_type - ]["metric"]["metricWeight"], + ]["metric"]["samplingStrategy"] + ) + ) + if ( + "samplingPercentage" + in ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"] + and ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"]["samplingPercentage"] + is not None + ): + MI_para.append( + str( ants_para[para_index][para_type][trans_index][ trans_type - ]["metric"]["numberOfBins"], + ]["metric"]["samplingPercentage"] ) ) - if "samplingStrategy" in ants_para[para_index][ - para_type - ][trans_index][trans_type]["metric"] and ants_para[ - para_index - ][para_type][trans_index][trans_type]["metric"][ - "samplingStrategy" - ] in ["None", "Regular", "Random"]: - MI_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["metric"]["samplingStrategy"] - ) - ) - if ( - "samplingPercentage" - in ants_para[para_index][para_type][trans_index][ - trans_type - ]["metric"] - and ants_para[para_index][para_type][trans_index][ - trans_type - ]["metric"]["samplingPercentage"] - is not None - ): - MI_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["metric"][ - "samplingPercentage" - ] - ) - ) - MI_para = ",".join([str(elem) for elem in MI_para]) - regcmd.append("--metric") - if reg_with_skull == 1: - regcmd.append( - "MI[{0},{1},{2}]".format( - reference_skull, moving_skull, MI_para - ) - ) - else: - regcmd.append( - "MI[{0},{1},{2}]".format( - reference_brain, moving_brain, MI_para - ) - ) + MI_para = ",".join([str(elem) for elem in MI_para]) + regcmd.append("--metric") + if reg_with_skull == 1: + regcmd.append( + f"MI[{reference_skull},{moving_skull},{MI_para}]" + ) + else: + regcmd.append( + f"MI[{reference_brain},{moving_brain},{MI_para}]" + ) if ( ants_para[para_index][para_type][trans_index][trans_type][ @@ -343,60 +336,54 @@ def hardcoded_reg( ]["metric"]["radius"] is None ): - err_msg = "Please specifiy metricWeight and radius for metric CC of ANTs parameters in pipeline config." - raise Exception(err_msg) - else: - CC_para = [] + err_msg = ( + "Please specifiy metricWeight and radius for metric" + " CC of ANTs parameters in pipeline config." + ) + raise RequiredFieldInvalid(err_msg) + CC_para = [] + _metric = ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"] + CC_para.append( + f"{_metric['metricWeight']},{_metric['radius']}" + ) + if "samplingStrategy" in ants_para[para_index][para_type][ + trans_index + ][trans_type]["metric"] and ants_para[para_index][ + para_type + ][trans_index][trans_type]["metric"][ + "samplingStrategy" + ] in ["None", "Regular", "Random"]: CC_para.append( - "{0},{1}".format( - ants_para[para_index][para_type][trans_index][ - trans_type - ]["metric"]["metricWeight"], + str( ants_para[para_index][para_type][trans_index][ trans_type - ]["metric"]["radius"], + ]["metric"]["samplingStrategy"] ) ) - if "samplingStrategy" in ants_para[para_index][ - para_type - ][trans_index][trans_type]["metric"] and ants_para[ - para_index - ][para_type][trans_index][trans_type]["metric"][ - "samplingStrategy" - ] in ["None", "Regular", "Random"]: - CC_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["metric"]["samplingStrategy"] - ) - ) - if ( - "samplingPercentage" - in ants_para[para_index][para_type][trans_index][ - trans_type - ]["metric"] - and ants_para[para_index][para_type][trans_index][ - trans_type - ]["metric"]["samplingPercentage"] - is not None - ): - CC_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["metric"][ - "samplingPercentage" - ] - ) - ) - CC_para = ",".join([str(elem) for elem in CC_para]) - regcmd.append("--metric") - regcmd.append( - "CC[{0},{1},{2}]".format( - reference_skull, moving_skull, CC_para + if ( + "samplingPercentage" + in ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"] + and ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"]["samplingPercentage"] + is not None + ): + CC_para.append( + str( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["metric"]["samplingPercentage"] ) ) + CC_para = ",".join([str(elem) for elem in CC_para]) + regcmd.append("--metric") + regcmd.append( + f"CC[{reference_skull},{moving_skull},{CC_para}]" + ) if ( "convergence" @@ -409,59 +396,57 @@ def hardcoded_reg( ]["convergence"]["iteration"] is None ): - err_msg = "Please specifiy convergence iteration of ANTs parameters in pipeline config." - raise Exception(err_msg) - else: + err_msg = ( + "Please specifiy convergence iteration of ANTs" + " parameters in pipeline config." + ) + raise RequiredFieldInvalid(err_msg) + convergence_para.append( + str( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"]["iteration"] + ) + ) + if ( + "convergenceThreshold" + in ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"] + and ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"]["convergenceThreshold"] + is not None + ): convergence_para.append( - "{0}".format( + str( ants_para[para_index][para_type][trans_index][ trans_type - ]["convergence"]["iteration"] + ]["convergence"]["convergenceThreshold"] ) ) - if ( - "convergenceThreshold" - in ants_para[para_index][para_type][trans_index][ - trans_type - ]["convergence"] - and ants_para[para_index][para_type][trans_index][ - trans_type - ]["convergence"]["convergenceThreshold"] - is not None - ): - convergence_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["convergence"][ - "convergenceThreshold" - ] - ) - ) - if ( - "convergenceWindowSize" - in ants_para[para_index][para_type][trans_index][ - trans_type - ]["convergence"] - and ants_para[para_index][para_type][trans_index][ - trans_type - ]["convergence"]["convergenceWindowSize"] - is not None - ): - convergence_para.append( - "{0}".format( - ants_para[para_index][para_type][ - trans_index - ][trans_type]["convergence"][ - "convergenceWindowSize" - ] - ) + if ( + "convergenceWindowSize" + in ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"] + and ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"]["convergenceWindowSize"] + is not None + ): + convergence_para.append( + str( + ants_para[para_index][para_type][trans_index][ + trans_type + ]["convergence"]["convergenceWindowSize"] ) - convergence_para = ",".join( - [str(elem) for elem in convergence_para] ) - regcmd.append("--convergence") - regcmd.append("[{0}]".format(convergence_para)) + convergence_para = ",".join( + [str(elem) for elem in convergence_para] + ) + regcmd.append("--convergence") + regcmd.append(f"[{convergence_para}]") if ( "smoothing-sigmas" @@ -473,7 +458,7 @@ def hardcoded_reg( ): regcmd.append("--smoothing-sigmas") regcmd.append( - "{0}".format( + str( ants_para[para_index][para_type][trans_index][ trans_type ]["smoothing-sigmas"] @@ -490,7 +475,7 @@ def hardcoded_reg( ): regcmd.append("--shrink-factors") regcmd.append( - "{0}".format( + str( ants_para[para_index][para_type][trans_index][ trans_type ]["shrink-factors"] @@ -520,15 +505,11 @@ def hardcoded_reg( is not None ): regcmd.append("--winsorize-image-intensities") + _quantile = ants_para[para_index][para_type][trans_index][ + trans_type + ]["winsorize-image-intensities"] regcmd.append( - "[{0},{1}]".format( - ants_para[para_index][para_type][trans_index][ - trans_type - ]["winsorize-image-intensities"]["lowerQuantile"], - ants_para[para_index][para_type][trans_index][ - trans_type - ]["winsorize-image-intensities"]["upperQuantile"], - ) + f"[{_quantile['lowerQuantile']},{_quantile['upperQuantile']}]" ) if ( @@ -543,9 +524,7 @@ def hardcoded_reg( trans_type ]["masks"]: regcmd.append("--masks") - regcmd.append( - "[{0},{1}]".format(reference_mask, moving_mask) - ) + regcmd.append(f"[{reference_mask},{moving_mask}]") else: regcmd.append("--masks") regcmd.append("[NULL,NULL]") @@ -557,30 +536,27 @@ def hardcoded_reg( regcmd.append(str(fixed_image_mask)) else: if ( - ants_para[para_index][para_type]["fixed_image_mask"] is False + not ants_para[para_index][para_type]["fixed_image_mask"] and ants_para[para_index][para_type]["moving_image_mask"] - is True ): err_msg = ( - "Masks option in ANTs parameters: %d is not supported. " - "Please set `fixed_image_mask` as True. " - "Or set both `fixed_image_mask` and `moving_image_mask` as False" - % ants_para[para_index][para_type] + "Masks option in ANTs parameters:" + f" {ants_para[para_index][para_type]} is not supported." + " Please set `fixed_image_mask` as True. Or set both" + " `fixed_image_mask` and `moving_image_mask` as False" ) - raise Exception(err_msg) - elif ( - ants_para[para_index][para_type]["fixed_image_mask"] is True + raise NotImplementedError(err_msg) + if ( + ants_para[para_index][para_type]["fixed_image_mask"] and ants_para[para_index][para_type]["moving_image_mask"] - is True ): regcmd.append("--masks") regcmd.append( "[" + str(reference_mask) + "," + str(moving_mask) + "]" ) elif ( - ants_para[para_index][para_type]["fixed_image_mask"] is True + ants_para[para_index][para_type]["fixed_image_mask"] and ants_para[para_index][para_type]["moving_image_mask"] - is False ): regcmd.append("--masks") regcmd.append("[" + str(reference_mask) + "]") @@ -589,7 +565,7 @@ def hardcoded_reg( if interp is not None: regcmd.append("--interpolation") - regcmd.append("{0}".format(interp)) + regcmd.append(f"{interp}") regcmd.append("--output") regcmd.append("[transform,transform_Warped.nii.gz]") @@ -602,10 +578,11 @@ def hardcoded_reg( try: subprocess.check_output(regcmd) except Exception as e: - raise Exception( + msg = ( "[!] ANTS registration did not complete successfully." - "\n\nError details:\n{0}\n{1}\n".format(e, e.output) + f"\n\nError details:\n{e}\n{e.output}\n" ) + raise RuntimeError(msg) warp_list = [] warped_image = None @@ -619,21 +596,22 @@ def hardcoded_reg( warped_image = os.getcwd() + "/" + f if not warped_image: - raise Exception( - "\n\n[!] No registration output file found. ANTS " - "registration may not have completed " - "successfully.\n\n" + msg = ( + "\n\n[!] No registration output file found. ANTS registration may not have" + " completed successfully.\n\n" ) + raise RuntimeError(msg) return warp_list, warped_image def change_itk_transform_type(input_affine_file): - """ - this function takes in the affine.txt produced by the c3d_affine_tool + """Produce an updated affine file for ANTs compatibility. + + This function takes in the affine.txt produced by the c3d_affine_tool (which converted an FSL FLIRT affine.mat into the affine.txt). - it then modifies the 'Transform Type' of this affine.txt so that it is + It then modifies the 'Transform Type' of this affine.txt so that it is compatible with the antsApplyTransforms tool and produces a new affine file titled 'updated_affine.txt' """ @@ -698,7 +676,7 @@ def one_d_to_mat(one_d_filename): return mat_filenames -def run_ants_apply_warp( +def run_ants_apply_warp( # noqa: PLR0913 moving_image, reference, initial=None, @@ -757,35 +735,35 @@ def run_ants_apply_warp( if nonlinear: cmd.append("-t") if inverse: - cmd.append("[{0}, {1}]".format(os.path.abspath(nonlinear), "1")) + cmd.append(f"[{os.path.abspath(nonlinear)}, 1]") else: cmd.append(os.path.abspath(nonlinear)) if affine: cmd.append("-t") if inverse: - cmd.append("[{0}, {1}]".format(os.path.abspath(affine), "1")) + cmd.append(f"[{os.path.abspath(affine)}, 1]") else: cmd.append(os.path.abspath(affine)) if rigid: cmd.append("-t") if inverse: - cmd.append("[{0}, {1}]".format(os.path.abspath(rigid), "1")) + cmd.append(f"[{os.path.abspath(rigid)}, 1]") else: cmd.append(os.path.abspath(rigid)) if initial: cmd.append("-t") if inverse: - cmd.append("[{0}, {1}]".format(os.path.abspath(initial), "1")) + cmd.append(f"[{os.path.abspath(initial)}, 1]") else: cmd.append(os.path.abspath(initial)) if func_to_anat: cmd.append("-t") if inverse: - cmd.append("[{0}, {1}]".format(os.path.abspath(func_to_anat), "1")) + cmd.append(f"[{os.path.abspath(func_to_anat)}, 1]") else: cmd.append(os.path.abspath(func_to_anat)) @@ -794,24 +772,8 @@ def run_ants_apply_warp( return out_image -def cpac_ants_apply_nonlinear_inverse_warp( - cpac_dir, moving_image, reference, dim=3, interp="Linear" -): - """Run antsApplyTransforms for inverse warping when given a C-PAC output - directory. - """ - import os - - cpac_dir = os.path.abspath(cpac_dir) - - for dir in os.listdir(cpac_dir): - if "ants_initial_xfm" in dir: - pass - - # run_ants_apply_warp() - - def run_c3d(reference_file, source_file, transform_file): + """Run c3d_affine_tool to convert an FSL FLIRT affine transform to ITK.""" import os import subprocess @@ -833,14 +795,15 @@ def run_c3d(reference_file, source_file, transform_file): return itk_transform -def run_c4d(input, output_name): +def run_c4d(input_name, output_name): + """Run c4d to split a 4D image into 3D images.""" import os output1 = os.path.join(os.getcwd(), output_name + "1.nii.gz") output2 = os.path.join(os.getcwd(), output_name + "2.nii.gz") output3 = os.path.join(os.getcwd(), output_name + "3.nii.gz") - cmd = "c4d -mcs %s -oo %s %s %s" % (input, output1, output2, output3) + cmd = f"c4d -mcs {input_name} -oo {output1} {output2} {output3}" os.system(cmd) return output1, output2, output3 diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index fe11205907..2dd5379ae5 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -94,7 +94,7 @@ def create_reho(wf_name): reho_imports = [ "import os", "import sys", - "import nibabel as nb", + "import nibabel as nib", "import numpy as np", "from CPAC.reho.utils import f_kendall", ] diff --git a/CPAC/reho/utils.py b/CPAC/reho/utils.py index 3a69c9abee..d48434af00 100644 --- a/CPAC/reho/utils.py +++ b/CPAC/reho/utils.py @@ -1,3 +1,28 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os +import sys + +import numpy as np +import nibabel as nib + +from CPAC.utils.monitoring import IFLOGGER + + def getOpString(mean, std_dev): """ Generate the Operand String to be used in workflow nodes to supply @@ -88,12 +113,13 @@ def compute_reho(in_file, mask_file, cluster_size): nvoxel = cluster_size - res_img = nb.load(res_fname) - res_mask_img = nb.load(res_mask_fname) + res_img = nib.load(res_fname) + res_mask_img = nib.load(res_mask_fname) res_data = res_img.get_fdata() res_mask_data = res_mask_img.get_fdata() + IFLOGGER.info(res_data.shape) (n_x, n_y, n_z, n_t) = res_data.shape # "flatten" each volume of the timeseries into one big array instead of @@ -257,7 +283,7 @@ def compute_reho(in_file, mask_file, cluster_size): K[i, j, k] = f_kendall(mask_R_block) - img = nb.Nifti1Image(K, header=res_img.header, affine=res_img.affine) + img = nib.Nifti1Image(K, header=res_img.header, affine=res_img.affine) reho_file = os.path.join(os.getcwd(), "ReHo.nii.gz") img.to_filename(reho_file) return reho_file diff --git a/CPAC/resources/cpac_outputs.tsv b/CPAC/resources/cpac_outputs.tsv index cf6a492380..03ddd5de73 100644 --- a/CPAC/resources/cpac_outputs.tsv +++ b/CPAC/resources/cpac_outputs.tsv @@ -1,220 +1,220 @@ Resource Type Space Sub-Directory File To Smooth To z-std 4D Time Series Optional: Debugging Multi-File -alff alff functional func NIfTI Yes Yes -desc-sm_alff alff functional func NIfTI Yes -desc-sm-zstd_alff alff functional func NIfTI -desc-zstd_alff alff functional func NIfTI -space-template_alff alff template func NIfTI Yes Yes -space-template_desc-sm_alff alff template func NIfTI Yes -space-template_desc-sm-zstd_alff alff template func NIfTI -space-template_desc-zstd_alff alff template func NIfTI -desc-brain_bold bold functional func NIfTI Yes Yes -desc-mean_bold bold functional func NIfTI -desc-motion_bold bold functional func NIfTI Yes Yes -desc-preproc_bold bold functional func NIfTI Yes -desc-sm_bold bold functional func NIfTI Yes Yes -sbref bold functional func NIfTI -space-EPItemplate_bold bold EPI template func NIfTI Yes -space-EPItemplate_desc-brain_bold bold EPI template func NIfTI Yes Yes -space-EPItemplate_desc-mean_bold bold EPI template func NIfTI -space-EPItemplate_desc-preproc_bold bold EPI template func NIfTI Yes -space-symtemplate_desc-sm_bold bold symmetric template func NIfTI Yes Yes -space-T1w_sbref bold T1w func NIfTI -space-template_bold bold template func NIfTI Yes -space-template_desc-brain_bold bold template func NIfTI Yes Yes -space-template_desc-head_bold bold template func NIfTI Yes -space-template_desc-mean_bold bold template func NIfTI -space-template_desc-preproc_bold bold template func NIfTI Yes -space-template_desc-scout_bold bold template func NIfTI -space-template_sbref bold template func NIfTI -space-template_desc-DualReg_correlations correlation template func NIfTI -space-template_desc-MeanSCA_correlations correlation template func NIfTI -space-template_desc-MultReg_correlations correlation template func NIfTI -space-template_desc-ndmg_correlations correlation template func NIfTI -space-template_desc-PearsonAfni_correlations correlation template func tsv -space-template_desc-PartialAfni_correlations correlation template func tsv -space-template_desc-PearsonNilearn_correlations correlation template func tsv -space-template_desc-PartialNilearn_correlations correlation template func tsv -space-template_dcb degree-centrality template func NIfTI Yes Yes -space-template_desc-sm_dcb degree-centrality template func NIfTI Yes -space-template_desc-sm-zstd_dcb degree-centrality template func NIfTI -space-template_desc-zstd_dcb degree-centrality template func NIfTI -space-template_dcw degree-centrality template func NIfTI Yes Yes -space-template_desc-sm_dcw degree-centrality template func NIfTI Yes -space-template_desc-sm-zstd_dcw degree-centrality template func NIfTI -space-template_desc-zstd_dcw degree-centrality template func NIfTI -space-template_ecb eigen-centrality template func NIfTI Yes Yes -space-template_desc-sm_ecb eigen-centrality template func NIfTI Yes -space-template_desc-sm-zstd_ecb eigen-centrality template func NIfTI -space-template_desc-zstd_ecb eigen-centrality template func NIfTI -space-template_ecw eigen-centrality template func NIfTI Yes Yes -space-template_desc-sm_ecw eigen-centrality template func NIfTI Yes -space-template_desc-sm-zstd_ecw eigen-centrality template func NIfTI -space-template_desc-zstd_ecw eigen-centrality template func NIfTI -desc-sm_falff falff functional func NIfTI Yes -desc-sm-zstd_falff falff functional func NIfTI -desc-zstd_falff falff functional func NIfTI -falff falff functional func NIfTI Yes Yes -space-template_desc-sm_falff falff template func NIfTI Yes -space-template_desc-sm-zstd_falff falff template func NIfTI -space-template_desc-zstd_falff falff template func NIfTI -space-template_falff falff template func NIfTI Yes Yes -space-template_lfcdb lfcd template func NIfTI Yes Yes -space-template_desc-sm_lfcdb lfcd template func NIfTI Yes -space-template_desc-sm-zstd_lfcdb lfcd template func NIfTI -space-template_desc-zstd_lfcdb lfcd template func NIfTI -space-template_lfcdw lfcd template func NIfTI Yes Yes -space-template_desc-sm_lfcdw lfcd template func NIfTI Yes -space-template_desc-sm-zstd_lfcdw lfcd template func NIfTI -space-template_desc-zstd_lfcdw lfcd template func NIfTI -space-EPItemplate_desc-bold_mask mask EPI template func NIfTI -space-EPItemplate_res-derivative_desc-bold_mask mask EPI template func NIfTI -space-bold_desc-brain_mask mask functional func NIfTI -space-bold_desc-eroded_mask mask functional func NIfTI -space-bold_label-CSF_desc-eroded_mask mask functional func NIfTI -space-bold_label-CSF_mask mask functional func NIfTI -space-bold_label-GM_desc-eroded_mask mask functional func NIfTI -space-bold_label-GM_mask mask functional func NIfTI -space-bold_label-WM_desc-eroded_mask mask functional func NIfTI -space-bold_label-WM_mask mask functional func NIfTI -space-longitudinal_desc-brain_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-CSF_desc-preproc_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-CSF_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-GM_desc-preproc_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-GM_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-WM_desc-preproc_mask mask longitudinal T1w anat NIfTI -space-longitudinal_label-WM_mask mask longitudinal T1w anat NIfTI -label-CSF_desc-eroded_mask mask T1w anat NIfTI -label-CSF_desc-preproc_mask mask T1w anat NIfTI -label-CSF_mask mask T1w anat NIfTI -label-GM_desc-eroded_mask mask T1w anat NIfTI -label-GM_desc-preproc_mask mask T1w anat NIfTI -label-GM_mask mask T1w anat NIfTI -label-WM_desc-eroded_mask mask T1w anat NIfTI -label-WM_desc-preproc_mask mask T1w anat NIfTI -label-WM_mask mask T1w anat NIfTI -space-T1w_desc-acpcbrain_mask mask T1w anat NIfTI -space-T1w_desc-brain_mask mask T1w anat NIfTI -space-T1w_desc-eroded_mask mask T1w anat NIfTI -space-template_desc-brain_mask mask template anat NIfTI -space-template_desc-bold_mask mask template func NIfTI -space-template_res-derivative_desc-bold_mask mask template func NIfTI -motion motion func TSV -desc-summary_motion motion func TSV -motion-filter-plot motion func png -desc-movementParameters_motion motion func TSV -desc-movementParametersUnfiltered_motion motion func TSV -label-CSF_probseg probseg T1w anat NIfTI -label-GM_probseg probseg T1w anat NIfTI -label-WM_probseg probseg T1w anat NIfTI -desc-T1wAxial_quality qc anat png -desc-T1wSagittal_quality qc anat png -desc-dsegAxial_quality qc anat png -desc-dsegSagittal_quality qc anat png -desc-boldAxial_quality qc func png -desc-boldSagittal_quality qc func png -desc-boldCarpet_quality qc func png -desc-framewiseDisplacementJenkinsonPlot_quality qc func png -desc-movementParametersTrans_quality qc func png -desc-movementParametersRot_quality qc func png -desc-boldSnrAxial_quality qc func png -desc-boldSnrSagittal_quality qc func png -desc-boldSnrHist_quality qc func png -desc-boldSnr_quality qc func png -space-template_desc-xcp_quality qc func tsv -desc-confounds_timeseries regressors func 1D -desc-sm_reho reho functional func NIfTI Yes -desc-sm-zstd_reho reho functional func NIfTI -desc-zstd_reho reho functional func NIfTI -reho reho functional func NIfTI Yes Yes -space-template_desc-sm_reho reho template func NIfTI Yes -space-template_desc-sm-zstd_reho reho template func NIfTI -space-template_desc-zstd_reho reho template func NIfTI -space-template_reho reho template func NIfTI Yes Yes -desc-DualReg_statmap statistic template func NIfTI -desc-MultReg_statmap statistic template func NIfTI +alff alff functional func NIfTI Yes Yes +desc-sm_alff alff functional func NIfTI Yes +desc-sm-zstd_alff alff functional func NIfTI +desc-zstd_alff alff functional func NIfTI +space-template_alff alff template func NIfTI Yes Yes +space-template_desc-sm_alff alff template func NIfTI Yes +space-template_desc-sm-zstd_alff alff template func NIfTI +space-template_desc-zstd_alff alff template func NIfTI +desc-brain_bold bold functional func NIfTI Yes Yes +desc-mean_bold bold functional func NIfTI +desc-motion_bold bold functional func NIfTI Yes Yes +desc-preproc_bold bold functional func NIfTI Yes +desc-sm_bold bold functional func NIfTI Yes Yes +sbref bold functional func NIfTI +space-EPItemplate_bold bold EPI template func NIfTI Yes +space-EPItemplate_desc-brain_bold bold EPI template func NIfTI Yes Yes +space-EPItemplate_desc-mean_bold bold EPI template func NIfTI +space-EPItemplate_desc-preproc_bold bold EPI template func NIfTI Yes +space-symtemplate_desc-sm_bold bold symmetric template func NIfTI Yes Yes +space-T1w_sbref bold T1w func NIfTI +space-template_bold bold template func NIfTI Yes +space-template_desc-brain_bold bold template func NIfTI Yes Yes +space-template_desc-head_bold bold template func NIfTI Yes +space-template_desc-mean_bold bold template func NIfTI +space-template_desc-preproc_bold bold template func NIfTI Yes +space-template_desc-scout_bold bold template func NIfTI +space-template_sbref bold template func NIfTI +space-template_desc-DualReg_correlations correlation template func NIfTI +space-template_desc-MeanSCA_correlations correlation template func NIfTI +space-template_desc-MultReg_correlations correlation template func NIfTI +space-template_desc-ndmg_correlations correlation template func NIfTI +space-template_desc-PearsonAfni_correlations correlation template func tsv +space-template_desc-PartialAfni_correlations correlation template func tsv +space-template_desc-PearsonNilearn_correlations correlation template func tsv +space-template_desc-PartialNilearn_correlations correlation template func tsv +space-template_dcb degree-centrality template func NIfTI Yes Yes +space-template_desc-sm_dcb degree-centrality template func NIfTI Yes +space-template_desc-sm-zstd_dcb degree-centrality template func NIfTI +space-template_desc-zstd_dcb degree-centrality template func NIfTI +space-template_dcw degree-centrality template func NIfTI Yes Yes +space-template_desc-sm_dcw degree-centrality template func NIfTI Yes +space-template_desc-sm-zstd_dcw degree-centrality template func NIfTI +space-template_desc-zstd_dcw degree-centrality template func NIfTI +space-template_ecb eigen-centrality template func NIfTI Yes Yes +space-template_desc-sm_ecb eigen-centrality template func NIfTI Yes +space-template_desc-sm-zstd_ecb eigen-centrality template func NIfTI +space-template_desc-zstd_ecb eigen-centrality template func NIfTI +space-template_ecw eigen-centrality template func NIfTI Yes Yes +space-template_desc-sm_ecw eigen-centrality template func NIfTI Yes +space-template_desc-sm-zstd_ecw eigen-centrality template func NIfTI +space-template_desc-zstd_ecw eigen-centrality template func NIfTI +desc-sm_falff falff functional func NIfTI Yes +desc-sm-zstd_falff falff functional func NIfTI +desc-zstd_falff falff functional func NIfTI +falff falff functional func NIfTI Yes Yes +space-template_desc-sm_falff falff template func NIfTI Yes +space-template_desc-sm-zstd_falff falff template func NIfTI +space-template_desc-zstd_falff falff template func NIfTI +space-template_falff falff template func NIfTI Yes Yes +space-template_lfcdb lfcd template func NIfTI Yes Yes +space-template_desc-sm_lfcdb lfcd template func NIfTI Yes +space-template_desc-sm-zstd_lfcdb lfcd template func NIfTI +space-template_desc-zstd_lfcdb lfcd template func NIfTI +space-template_lfcdw lfcd template func NIfTI Yes Yes +space-template_desc-sm_lfcdw lfcd template func NIfTI Yes +space-template_desc-sm-zstd_lfcdw lfcd template func NIfTI +space-template_desc-zstd_lfcdw lfcd template func NIfTI +space-EPItemplate_desc-bold_mask mask EPI template func NIfTI +space-EPItemplate_res-derivative_desc-bold_mask mask EPI template func NIfTI +space-bold_desc-brain_mask mask functional func NIfTI +space-bold_desc-eroded_mask mask functional func NIfTI +space-bold_label-CSF_desc-eroded_mask mask functional func NIfTI +space-bold_label-CSF_mask mask functional func NIfTI +space-bold_label-GM_desc-eroded_mask mask functional func NIfTI +space-bold_label-GM_mask mask functional func NIfTI +space-bold_label-WM_desc-eroded_mask mask functional func NIfTI +space-bold_label-WM_mask mask functional func NIfTI +space-longitudinal_desc-brain_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-CSF_desc-preproc_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-CSF_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-GM_desc-preproc_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-GM_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-WM_desc-preproc_mask mask longitudinal T1w anat NIfTI +space-longitudinal_label-WM_mask mask longitudinal T1w anat NIfTI +label-CSF_desc-eroded_mask mask T1w anat NIfTI +label-CSF_desc-preproc_mask mask T1w anat NIfTI +label-CSF_mask mask T1w anat NIfTI +label-GM_desc-eroded_mask mask T1w anat NIfTI +label-GM_desc-preproc_mask mask T1w anat NIfTI +label-GM_mask mask T1w anat NIfTI +label-WM_desc-eroded_mask mask T1w anat NIfTI +label-WM_desc-preproc_mask mask T1w anat NIfTI +label-WM_mask mask T1w anat NIfTI +space-T1w_desc-acpcbrain_mask mask T1w anat NIfTI +space-T1w_desc-brain_mask mask T1w anat NIfTI +space-T1w_desc-eroded_mask mask T1w anat NIfTI +space-template_desc-brain_mask mask template anat NIfTI +space-template_desc-bold_mask mask template func NIfTI +space-template_res-derivative_desc-bold_mask mask template func NIfTI +motion motion func TSV +desc-summary_motion motion func TSV +motion-filter-plot motion func png +desc-movementParameters_motion motion func TSV +desc-movementParametersUnfiltered_motion motion func TSV +label-CSF_probseg probseg T1w anat NIfTI +label-GM_probseg probseg T1w anat NIfTI +label-WM_probseg probseg T1w anat NIfTI +desc-T1wAxial_quality qc anat png +desc-T1wSagittal_quality qc anat png +desc-dsegAxial_quality qc anat png +desc-dsegSagittal_quality qc anat png +desc-boldAxial_quality qc func png +desc-boldSagittal_quality qc func png +desc-boldCarpet_quality qc func png +desc-framewiseDisplacementJenkinsonPlot_quality qc func png +desc-movementParametersTrans_quality qc func png +desc-movementParametersRot_quality qc func png +desc-boldSnrAxial_quality qc func png +desc-boldSnrSagittal_quality qc func png +desc-boldSnrHist_quality qc func png +desc-boldSnr_quality qc func png +space-template_desc-xcp_quality qc func tsv +desc-confounds_timeseries regressors func 1D +desc-sm_reho reho functional func NIfTI Yes +desc-sm-zstd_reho reho functional func NIfTI +desc-zstd_reho reho functional func NIfTI +reho reho functional func NIfTI Yes Yes +space-template_desc-sm_reho reho template func NIfTI Yes +space-template_desc-sm-zstd_reho reho template func NIfTI +space-template_desc-zstd_reho reho template func NIfTI +space-template_reho reho template func NIfTI Yes Yes +desc-DualReg_statmap statistic template func NIfTI +desc-MultReg_statmap statistic template func NIfTI hemi-L_desc-surfaceMap_thickness surface-derived anat Yes hemi-R_desc-surfaceMap_thickness surface-derived anat Yes hemi-L_desc-surfaceMap_volume surface-derived anat Yes hemi-R_desc-surfaceMap_volume surface-derived anat Yes -hemi-L_desc-surfaceMesh_pial surface-derived anat -hemi-R_desc-surfaceMesh_pial surface-derived anat -raw-average surface-derived anat -hemi-L_desc-surfaceMesh_smoothwm surface-derived anat -hemi-R_desc-surfaceMesh_smoothwm surface-derived anat -atlas-DesikanKilliany_space-fsLR_den-32k_dlabel surface-derived anat -atlas-Destrieux_space-fsLR_den-32k_dlabel surface-derived anat -atlas-DesikanKilliany_space-fsLR_den-164k_dlabel surface-derived anat -atlas-Destrieux_space-fsLR_den-164k_dlabel surface-derived anat -space-fsLR_den-32k_bold-dtseries surface-derived func +hemi-L_desc-surfaceMesh_pial surface-derived anat +hemi-R_desc-surfaceMesh_pial surface-derived anat +raw-average surface-derived anat +hemi-L_desc-surfaceMesh_smoothwm surface-derived anat +hemi-R_desc-surfaceMesh_smoothwm surface-derived anat +atlas-DesikanKilliany_space-fsLR_den-32k_dlabel surface-derived anat +atlas-Destrieux_space-fsLR_den-32k_dlabel surface-derived anat +atlas-DesikanKilliany_space-fsLR_den-164k_dlabel surface-derived anat +atlas-Destrieux_space-fsLR_den-164k_dlabel surface-derived anat +space-fsLR_den-32k_bold-dtseries surface-derived func hemi-L_desc-surfaceMesh_sphere surface-derived anat Yes hemi-R_desc-surfaceMesh_sphere surface-derived anat Yes hemi-L_desc-surfaceMap_sulc surface-derived anat Yes hemi-R_desc-surfaceMap_sulc surface-derived anat Yes -hemi-L_desc-surface_curv surface-derived anat -hemi-R_desc-surface_curv surface-derived anat +hemi-L_desc-surface_curv surface-derived anat +hemi-R_desc-surface_curv surface-derived anat hemi-L_desc-surfaceMesh_white surface-derived anat Yes hemi-R_desc-surfaceMesh_white surface-derived anat Yes wmparc surface-derived anat Yes -space-symtemplate_desc-brain_T1w T1w symmetric template anat NIfTI Yes -desc-brain_T1w T1w T1w anat NIfTI Yes -desc-head_T1w T1w T1w anat NIfTI -desc-preproc_T1w T1w T1w anat NIfTI -desc-reorient_T1w T1w T1w anat NIfTI Yes -desc-restore_T1w T1w T1w anat NIfTI -desc-restore-brain_T1w T1w T1w anat NIfTI -space-template_desc-brain_T1w T1w template anat NIfTI Yes -space-template_desc-preproc_T1w T1w template anat NIfTI -space-template_desc-head_T1w T1w template anat NIfTI -space-template_desc-T1w_mask mask template anat NIfTI -space-template_desc-Mean_timeseries timeseries func 1D -desc-MeanSCA_timeseries timeseries func 1D -desc-SpatReg_timeseries timeseries func 1D -desc-Voxel_timeseries timeseries func 1D -space-longitudinal_label-CSF_probseg tissue probability longitudinal T1w anat NIfTI -space-longitudinal_label-GM_probseg tissue probability longitudinal T1w anat NIfTI -space-longitudinal_label-WM_probseg tissue probability longitudinal T1w anat NIfTI -vmhc vmhc symmetric template func NIfTI -blip-warp xfm func NIfTI -from-bold_to-EPItemplate_mode-image_desc-linear_xfm xfm func NIfTI -from-bold_to-EPItemplate_mode-image_desc-nonlinear_xfm xfm func NIfTI -from-bold_to-EPItemplate_mode-image_xfm xfm func NIfTI -from-bold_to-symtemplate_mode-image_xfm xfm func NIfTI -from-bold_to-T1w_mode-image_desc-linear_xfm xfm func NIfTI -from-bold_to-template_mode-image_xfm xfm func NIfTI -from-EPItemplate_to-bold_mode-image_desc-linear_xfm xfm func NIfTI -from-EPItemplate_to-bold_mode-image_desc-nonlinear_xfm xfm func NIfTI -from-longitudinal_to-symtemplate_mode-image_desc-linear_xfm xfm anat NIfTI -from-longitudinal_to-symtemplate_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-longitudinal_to-symtemplate_mode-image_xfm xfm anat NIfTI -from-longitudinal_to-template_mode-image_desc-linear_xfm xfm anat NIfTI -from-longitudinal_to-template_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-longitudinal_to-template_mode-image_xfm xfm anat NIfTI -from-symtemplate_to-bold_mode-image_xfm xfm func NIfTI -from-symtemplate_to-longitudinal_mode-image_desc-linear_xfm xfm anat NIfTI -from-symtemplate_to-longitudinal_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-symtemplate_to-longitudinal_mode-image_xfm xfm anat NIfTI -from-symtemplate_to-T1w_mode-image_desc-linear_xfm xfm anat NIfTI -from-symtemplate_to-T1w_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-symtemplate_to-T1w_mode-image_xfm xfm anat NIfTI -from-T1w_to-symtemplate_mode-image_desc-linear_xfm xfm anat NIfTI -from-T1w_to-symtemplate_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-T1w_to-symtemplate_mode-image_xfm xfm anat NIfTI -from-T1w_to-template_mode-image_desc-linear_xfm xfm anat NIfTI -from-T1w_to-template_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-T1w_to-template_mode-image_xfm xfm anat NIfTI -from-template_to-bold_mode-image_xfm xfm func NIfTI -from-template_to-longitudinal_mode-image_desc-linear_xfm xfm anat NIfTI -from-template_to-longitudinal_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-template_to-longitudinal_mode-image_xfm xfm anat NIfTI -from-template_to-T1w_mode-image_desc-linear_xfm xfm anat NIfTI -from-template_to-T1w_mode-image_desc-nonlinear_xfm xfm anat NIfTI -from-template_to-T1w_mode-image_xfm xfm anat NIfTI -space-template_label-CSF_mask mask template anat NIfTI -space-template_label-WM_mask mask template anat NIfTI -space-template_label-GM_mask mask template anat NIfTI -space-EPItemplate_label-CSF_mask mask template func NIfTI -space-EPItemplate_label-WM_mask mask template func NIfTI -space-EPItemplate_label-GM_mask mask template func NIfTI -mdmr group functional group_analysis NIfTI -desc-zstd-mdmr group functional group_analysis NIfTI Yes +space-symtemplate_desc-brain_T1w T1w symmetric template anat NIfTI Yes +desc-brain_T1w T1w T1w anat NIfTI Yes +desc-head_T1w T1w T1w anat NIfTI +desc-preproc_T1w T1w T1w anat NIfTI +desc-reorient_T1w T1w T1w anat NIfTI Yes +desc-restore_T1w T1w T1w anat NIfTI +desc-restore-brain_T1w T1w T1w anat NIfTI +space-template_desc-brain_T1w T1w template anat NIfTI Yes +space-template_desc-preproc_T1w T1w template anat NIfTI +space-template_desc-head_T1w T1w template anat NIfTI +space-template_desc-T1w_mask mask template anat NIfTI +space-template_desc-Mean_timeseries timeseries func 1D +desc-MeanSCA_timeseries timeseries func 1D +desc-SpatReg_timeseries timeseries func 1D +desc-Voxel_timeseries timeseries func 1D +space-longitudinal_label-CSF_probseg tissue probability longitudinal T1w anat NIfTI +space-longitudinal_label-GM_probseg tissue probability longitudinal T1w anat NIfTI +space-longitudinal_label-WM_probseg tissue probability longitudinal T1w anat NIfTI +vmhc vmhc symmetric template func NIfTI +blip-warp xfm func NIfTI +from-bold_to-EPItemplate_mode-image_desc-linear_xfm xfm func NIfTI +from-bold_to-EPItemplate_mode-image_desc-nonlinear_xfm xfm func NIfTI +from-bold_to-EPItemplate_mode-image_xfm xfm func NIfTI +from-bold_to-symtemplate_mode-image_xfm xfm func NIfTI +from-bold_to-T1w_mode-image_desc-linear_xfm xfm func NIfTI +from-bold_to-template_mode-image_xfm xfm func NIfTI +from-EPItemplate_to-bold_mode-image_desc-linear_xfm xfm func NIfTI +from-EPItemplate_to-bold_mode-image_desc-nonlinear_xfm xfm func NIfTI +from-longitudinal_to-symtemplate_mode-image_desc-linear_xfm xfm anat NIfTI +from-longitudinal_to-symtemplate_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-longitudinal_to-symtemplate_mode-image_xfm xfm anat NIfTI +from-longitudinal_to-template_mode-image_desc-linear_xfm xfm anat NIfTI +from-longitudinal_to-template_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-longitudinal_to-template_mode-image_xfm xfm anat NIfTI +from-symtemplate_to-bold_mode-image_xfm xfm func NIfTI +from-symtemplate_to-longitudinal_mode-image_desc-linear_xfm xfm anat NIfTI +from-symtemplate_to-longitudinal_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-symtemplate_to-longitudinal_mode-image_xfm xfm anat NIfTI +from-symtemplate_to-T1w_mode-image_desc-linear_xfm xfm anat NIfTI +from-symtemplate_to-T1w_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-symtemplate_to-T1w_mode-image_xfm xfm anat NIfTI +from-T1w_to-symtemplate_mode-image_desc-linear_xfm xfm anat NIfTI +from-T1w_to-symtemplate_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-T1w_to-symtemplate_mode-image_xfm xfm anat NIfTI +from-T1w_to-template_mode-image_desc-linear_xfm xfm anat NIfTI +from-T1w_to-template_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-T1w_to-template_mode-image_xfm xfm anat NIfTI +from-template_to-bold_mode-image_xfm xfm func NIfTI +from-template_to-longitudinal_mode-image_desc-linear_xfm xfm anat NIfTI +from-template_to-longitudinal_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-template_to-longitudinal_mode-image_xfm xfm anat NIfTI +from-template_to-T1w_mode-image_desc-linear_xfm xfm anat NIfTI +from-template_to-T1w_mode-image_desc-nonlinear_xfm xfm anat NIfTI +from-template_to-T1w_mode-image_xfm xfm anat NIfTI +space-template_label-CSF_mask mask template anat NIfTI +space-template_label-WM_mask mask template anat NIfTI +space-template_label-GM_mask mask template anat NIfTI +space-EPItemplate_label-CSF_mask mask template func NIfTI +space-EPItemplate_label-WM_mask mask template func NIfTI +space-EPItemplate_label-GM_mask mask template func NIfTI +mdmr group functional group_analysis NIfTI +desc-zstd-mdmr group functional group_analysis NIfTI Yes dseg anat diff --git a/CPAC/resources/templates/BIDS_identifiers.tsv b/CPAC/resources/templates/BIDS_identifiers.tsv index ed96cb3942..b43c6a1c9f 100644 --- a/CPAC/resources/templates/BIDS_identifiers.tsv +++ b/CPAC/resources/templates/BIDS_identifiers.tsv @@ -1,26 +1,26 @@ -/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_T1w_reference.nii.gz MNI152NLin2009cAsym -/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-brain_T1w.nii.gz MNI152NLin2009cAsym -/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-brain_mask.nii.gz MNI152NLin2009cAsym -/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-fMRIPrep_boldref.nii.gz MNI152NLin2009cAsym -/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_label-brain_probseg.nii.gz MNI152NLin2009cAsym -/code/CPAC/resources/templates/mni_icbm152_t1_tal_nlin_asym_09c.nii MNI152NLin2009cAsym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*.nii.gz MNI152NLin6ASym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain.nii.gz MNI152NLin6ASym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask.nii.gz MNI152NLin6ASym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_dil.nii.gz MNI152NLin6ASym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_symmetric.nii.gz MNI152NLin6Sym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_symmetric.nii.gz MNI152NLin6Sym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_symmetric.nii.gz MNI152NLin6Sym -$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_symmetric_dil.nii.gz MNI152NLin6Sym -/ndmg_atlases/label/Human/AAL_space-MNI152NLin6_res-2x2x2.nii.gz AAL -/ndmg_atlases/label/Human/Brodmann_space-MNI152NLin6_res-2x2x2.nii.gz Brodmann +/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_T1w_reference.nii.gz MNI152NLin2009cAsym +/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-brain_T1w.nii.gz MNI152NLin2009cAsym +/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-brain_mask.nii.gz MNI152NLin2009cAsym +/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_desc-fMRIPrep_boldref.nii.gz MNI152NLin2009cAsym +/code/CPAC/resources/templates/tpl-MNI152NLin2009cAsym_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_label-brain_probseg.nii.gz MNI152NLin2009cAsym +/code/CPAC/resources/templates/mni_icbm152_t1_tal_nlin_asym_09c.nii MNI152NLin2009cAsym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*.nii.gz MNI152NLin6ASym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain.nii.gz MNI152NLin6ASym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask.nii.gz MNI152NLin6ASym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_dil.nii.gz MNI152NLin6ASym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_symmetric.nii.gz MNI152NLin6Sym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_symmetric.nii.gz MNI152NLin6Sym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_symmetric.nii.gz MNI152NLin6Sym +$FSLDIR/data/standard/MNI152_T1_(res-){0,1}[0-9]+(\.[0-9]*){0,1}[a-z]*(x[0-9]+(\.[0-9]*){0,1}[a-z]*)*_brain_mask_symmetric_dil.nii.gz MNI152NLin6Sym +/ndmg_atlases/label/Human/AAL_space-MNI152NLin6_res-2x2x2.nii.gz AAL +/ndmg_atlases/label/Human/Brodmann_space-MNI152NLin6_res-2x2x2.nii.gz Brodmann /cpac_templates/CC200.nii.gz CC 200 /cpac_templates/CC400.nii.gz CC 400 -/ndmg_atlases/label/Human/Glasser_space-MNI152NLin6_res-2x2x2.nii.gz Glasser -/ndmg_atlases/label/Human/Slab907_space-MNI152NLin6_res-2x2x2.nii.gz Slab +/ndmg_atlases/label/Human/Glasser_space-MNI152NLin6_res-2x2x2.nii.gz Glasser +/ndmg_atlases/label/Human/Slab907_space-MNI152NLin6_res-2x2x2.nii.gz Slab /ndmg_atlases/label/Human/HarvardOxfordcort-maxprob-thr25_space-MNI152NLin6_res-2x2x2.nii.gz HOCPA th25 /ndmg_atlases/label/Human/HarvardOxfordsub-maxprob-thr25_space-MNI152NLin6_res-2x2x2.nii.gz HOSPA th25 -/ndmg_atlases/label/Human/Juelich_space-MNI152NLin6_res-2x2x2.nii.gz Juelich +/ndmg_atlases/label/Human/Juelich_space-MNI152NLin6_res-2x2x2.nii.gz Juelich /ndmg_atlases/label/Human/Schaefer[^_-]*200.*.nii(\.gz){0,1} Schaefer2018 p200n17 /ndmg_atlases/label/Human/Schaefer[^_-]*300.*.nii(\.gz){0,1} Schaefer2018 p300n17 /ndmg_atlases/label/Human/Schaefer[^_-]*400.*.nii(\.gz){0,1} Schaefer2018 p400n17 diff --git a/CPAC/resources/templates/ndmg_atlases.csv b/CPAC/resources/templates/ndmg_atlases.csv index 87ba249cad..15ac6b0ba6 100644 --- a/CPAC/resources/templates/ndmg_atlases.csv +++ b/CPAC/resources/templates/ndmg_atlases.csv @@ -19,4 +19,4 @@ "yeo-7_space-MNI152NLin6_res-1x1x1.nii.gz","Yeo-7_space-MNI152NLin6_res-1x1x1.nii.gz" "yeo-7-liberal_space-MNI152NLin6_res-1x1x1.nii.gz","Yeo-7-liberal_space-MNI152NLin6_res-1x1x1.nii.gz" "yeo-17_space-MNI152NLin6_res-1x1x1.nii.gz","Yeo-17_space-MNI152NLin6_res-1x1x1.nii.gz" -"yeo-17-liberal_space-MNI152NLin6_res-1x1x1.nii.gz","Yeo-17-liberal_space-MNI152NLin6_res-1x1x1.nii.gz" +"yeo-17-liberal_space-MNI152NLin6_res-1x1x1.nii.gz","Yeo-17-liberal_space-MNI152NLin6_res-1x1x1.nii.gz" \ No newline at end of file diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 8b8d316d1d..8708da1425 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -20,7 +20,7 @@ import pytest from CPAC.pipeline import ALL_PIPELINE_CONFIGS -from CPAC.pipeline.engine import ResourcePool, ingress_pipeconfig_paths +from CPAC.pipeline.engine import ingress_pipeconfig_paths, ResourcePool from CPAC.utils.configuration import Preconfiguration from CPAC.utils.datasource import get_highest_local_res diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index 01e35b17c3..4e2f9fc7a9 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -25,8 +25,6 @@ get_spatial_map_timeseries, resample_function, ) - -# from CPAC.utils.utils import extract_one_d from CPAC.utils.datasource import ( create_roi_mask_dataflow, create_spatial_map_dataflow, @@ -145,33 +143,8 @@ def create_sca(name_sca="sca"): concat.inputs.outputtype = "NIFTI_GZ" - # also write out volumes as individual files - # split = pe.Node(interface=fsl.Split(), name='split_raw_volumes_sca') - # split.inputs.dimension = 't' - # split.inputs.out_base_name = 'sca_' - - # get_roi_num_list = pe.Node(util.Function(input_names=['timeseries_file', - # 'prefix'], - # output_names=['roi_list'], - # function=get_roi_num_list), - # name='get_roi_num_list') - # get_roi_num_list.inputs.prefix = "sca" - - # sca.connect(inputNode, 'timeseries_one_d', get_roi_num_list, - # 'timeseries_file') - - # rename_rois = pe.MapNode(interface=util.Rename(), name='output_rois', - # iterfield=['in_file', 'format_string']) - # rename_rois.inputs.keep_ext = True - - # sca.connect(split, 'out_files', rename_rois, 'in_file') - # sca.connect(get_roi_num_list, 'roi_list', rename_rois, 'format_string') - sca.connect(corr, "out_file", concat, "in_files") - # sca.connect(concat, 'out_file', split, 'in_file') sca.connect(concat, "out_file", outputNode, "correlation_stack") - # sca.connect(rename_rois, 'out_file', outputNode, - # 'correlation_files') return sca @@ -483,15 +456,12 @@ def SCA_AVG(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect( roi_timeseries_for_sca, "outputspec.roi_csv", - # ('outputspec.roi_outputs', extract_one_d), sca_roi, "inputspec.timeseries_one_d", ) outputs = { "desc-MeanSCA_timeseries": (roi_timeseries_for_sca, "outputspec.roi_csv"), - # ('outputspec.roi_outputs', - # extract_one_d)), "space-template_desc-MeanSCA_correlations": ( sca_roi, "outputspec.correlation_stack", @@ -680,7 +650,6 @@ def multiple_regression(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect( roi_timeseries_for_multreg, "outputspec.roi_csv", - # ('outputspec.roi_outputs', extract_one_d), sc_temp_reg, "inputspec.subject_timeseries", ) diff --git a/CPAC/sca/utils.py b/CPAC/sca/utils.py index d09601ad7d..e0ddf01308 100644 --- a/CPAC/sca/utils.py +++ b/CPAC/sca/utils.py @@ -1,5 +1,23 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os +from CPAC.utils.monitoring import IFLOGGER + def compute_fisher_z_score(correlation_file, timeseries_one_d): """ @@ -97,14 +115,11 @@ def check_ts(in_file): np.savetxt(out_file, csv_array, delimiter="\t") if rois > timepoints: message = ( - "\n\n\n****The number of timepoints (" - + str(timepoints) - + ") is smaller than the number of ROIs to run (" - + str(rois) - + ") - therefore the GLM is" - + " underspecified and can't run.****\n\n\n" + f"\n\n\n****The number of timepoints ({timepoints}) is smaller than the" + f" number of ROIs to run ({rois}) - therefore the GLM is underspecified" + " and can't run.****\n\n\n" ) - raise Exception(message) + raise ValueError(message) else: return out_file @@ -137,15 +152,12 @@ def map_to_roi(timeseries, maps): (which == 'RT') """ import pandas as pd - from nipype import logging - - logger = logging.getLogger("nipype.workflow") testMat = pd.read_csv(timeseries) timepoints, rois = testMat.shape if rois > timepoints: - logger.warning( + IFLOGGER.warning( "The number of timepoints is smaller than the number " "of ROIs to run - therefore the GLM is " "underspecified and can't run." @@ -159,7 +171,7 @@ def map_to_roi(timeseries, maps): roi_err = ( "\n\n[!] The output of 3dROIstats, used in extracting " "the timeseries, was not in the expected format.\n\nROI " - "output file: {0}\n\n".format(timeseries) + f"output file: {timeseries}\n\n" ) for line in roi_file_lines: diff --git a/CPAC/scrubbing/scrubbing.py b/CPAC/scrubbing/scrubbing.py index b000438902..0520eb3e4b 100644 --- a/CPAC/scrubbing/scrubbing.py +++ b/CPAC/scrubbing/scrubbing.py @@ -188,7 +188,8 @@ def get_mov_parameters(infile_a, infile_b): l1 = l1.rstrip(",").split(",") warnings.warn("number of timepoints remaining after scrubbing -> %d" % len(l1)) else: - raise Exception("No time points remaining after scrubbing.") + msg = "No time points remaining after scrubbing." + raise Exception(msg) f = open(out_file, "a") for l in l1: @@ -222,7 +223,8 @@ def get_indx(scrub_input, frames_in_1D_file): if line: indx = map(int, line.split(",")) else: - raise Exception("No time points remaining after scrubbing.") + msg = "No time points remaining after scrubbing." + raise Exception(msg) return scrub_input + str(indx).replace(" ", "") diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index 0302a4c86f..86dc7feca0 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -259,7 +259,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): if use_ants: collect_linear_transforms = pe.Node( - util.Merge(3), name="{0}_collect_linear_transforms".format(wf_name) + util.Merge(3), name=f"{wf_name}_collect_linear_transforms" ) preproc.connect( @@ -279,7 +279,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): output_names=["checked_transform_list", "list_length"], function=check_transforms, ), - name="{0}_check_transforms".format(wf_name), + name=f"{wf_name}_check_transforms", ) preproc.connect( @@ -294,7 +294,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, ), - name="{0}_inverse_transform_flags".format(wf_name), + name=f"{wf_name}_inverse_transform_flags", ) preproc.connect( @@ -306,7 +306,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # mni to t1 tissueprior_mni_to_t1 = pe.Node( - interface=ants.ApplyTransforms(), name="{0}_mni_to_t1".format(wf_name) + interface=ants.ApplyTransforms(), name=f"{wf_name}_mni_to_t1" ) tissueprior_mni_to_t1.inputs.interpolation = "NearestNeighbor" @@ -334,7 +334,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): else: tissueprior_mni_to_t1 = pe.Node( - interface=fsl.FLIRT(), name="{0}_mni_to_t1".format(wf_name) + interface=fsl.FLIRT(), name=f"{wf_name}_mni_to_t1" ) tissueprior_mni_to_t1.inputs.apply_xfm = True tissueprior_mni_to_t1.inputs.interp = "nearestneighbour" @@ -402,7 +402,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base "anatomical_brain", "anatomical_brain_mask", "template_brain_list", - "template_segmentation" "_list", + "template_segmentation_list", "csf_label", "gm_label", "wm_label", @@ -427,7 +427,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base output_names=["multiatlas_Intensity", "multiatlas_Labels"], function=hardcoded_antsJointLabelFusion, ), - name="{0}_antsJointLabel".format(wf_name), + name=f"{wf_name}_antsJointLabel", ) preproc.connect( @@ -453,7 +453,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base ) pick_tissue = pe.Node( - pick_tissue_from_labels_file_interface(), name="{0}_tissue_mask".format(wf_name) + pick_tissue_from_labels_file_interface(), name=f"{wf_name}_tissue_mask" ) preproc.connect( diff --git a/CPAC/seg_preproc/utils.py b/CPAC/seg_preproc/utils.py index 578a088b4a..eaebb42e4b 100644 --- a/CPAC/seg_preproc/utils.py +++ b/CPAC/seg_preproc/utils.py @@ -20,15 +20,13 @@ def check_if_file_is_empty(in_file): return same file """ import numpy as np - import nibabel as nb + import nibabel as nib - nii = nb.load(in_file) + nii = nib.load(in_file) data = nii.get_fdata() if data.size == 0 or np.all(data == 0) or np.all(data == np.nan): - raise ValueError( - "File {0} is empty. Use a lower threshold or turn " - "off regressors.".format(in_file) - ) + msg = f"File {in_file} is empty. Use a lower threshold or turn off regressors." + raise ValueError(msg) return in_file @@ -364,11 +362,12 @@ def hardcoded_antsJointLabelFusion( # pylint: disable=unused-variable except Exception as e: # pylint: disable=broad-except,invalid-name # pylint: disable=raise-missing-from - raise Exception( + msg = ( "[!] antsJointLabel segmentation method did not " "complete successfully.\n\nError " "details:\n{0}\n{1}\n".format(e, getattr(e, "output", "")) ) + raise Exception(msg) multiatlas_Intensity = None multiatlas_Labels = None @@ -382,11 +381,12 @@ def hardcoded_antsJointLabelFusion( multiatlas_Labels = os.getcwd() + "/" + f if not multiatlas_Labels: - raise Exception( + msg = ( "\n\n[!] No multiatlas labels file found. " "antsJointLabelFusion may not have completed " "successfully.\n\n" ) + raise Exception(msg) return multiatlas_Intensity, multiatlas_Labels diff --git a/CPAC/surface/tests/test_installation.py b/CPAC/surface/tests/test_installation.py index 32a1566d19..75f6c58e93 100644 --- a/CPAC/surface/tests/test_installation.py +++ b/CPAC/surface/tests/test_installation.py @@ -26,7 +26,7 @@ @pytest.mark.skipif( "FREESURFER_HOME" not in os.environ or not os.path.exists(os.environ["FREESURFER_HOME"]), - reason="We don't need these dependencies if we don't" "have FreeSurfer.", + reason="We don't need these dependencies if we don't have FreeSurfer.", ) def test_executable(executable): """Make sure executable is installed.""" diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index c3b38fe6f6..452c6a9261 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -30,6 +30,7 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.monitoring import FMLOGGER def get_voxel_timeseries(wf_name="voxel_timeseries"): @@ -547,11 +548,12 @@ def gen_roi_timeseries(data_file, template, output_type): img_data.shape[3] if unit_data.shape != img_data.shape[:3]: - raise Exception( + msg = ( "\n\n[!] CPAC says: Invalid Shape Error." "Please check the voxel dimensions. " "Data and roi should have the same shape.\n\n" ) + raise Exception(msg) nodes = np.unique(unit_data).tolist() sorted_list = [] @@ -570,7 +572,7 @@ def gen_roi_timeseries(data_file, template, output_type): for n in nodes: if n > 0: node_array = img_data[unit_data == n] - node_str = "node_{0}".format(n) + node_str = f"node_{n}" avg = np.mean(node_array, axis=0) avg = np.round(avg, 6) list1 = [n, *avg.tolist()] @@ -578,7 +580,7 @@ def gen_roi_timeseries(data_file, template, output_type): node_dict[node_str] = avg.tolist() # writing to 1Dfile - + FMLOGGER.info("writing 1D file..") f = open(oneD_file, "w") writer = csv.writer(f, delimiter=",") @@ -593,7 +595,7 @@ def gen_roi_timeseries(data_file, template, output_type): roi_number_str.append("#" + number) for key in new_keys: - value_list.append(str("{0}\n".format(node_dict["node_{0}".format(key)]))) + value_list.append(str("{0}\n".format(node_dict[f"node_{key}"]))) column_list = list(zip(*value_list)) @@ -612,7 +614,7 @@ def gen_roi_timeseries(data_file, template, output_type): # if csv is required """ if output_type[0]: - print("writing csv file..") + FMLOGGER.info("writing csv file..") f = open(csv_file, 'wt') writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL) headers = ['node/volume'] + np.arange(vol).tolist() @@ -623,7 +625,7 @@ def gen_roi_timeseries(data_file, template, output_type): # if npz file is required if output_type[1]: - print("writing npz file..") + FMLOGGER.info("writing npz file..") np.savez(numpy_file, roi_data=value_list, roi_numbers=roi_number_list) out_list.append(numpy_file) @@ -690,7 +692,7 @@ def gen_voxel_timeseries(data_file, template): node_array = node_array.T time_points = node_array.shape[0] for t in range(0, time_points): - string = "vol {0}".format(t) + string = f"vol {t}" vol_dict[string] = node_array[t] f.write(str(np.round(np.mean(node_array[t]), 6))) f.write("\n") @@ -754,7 +756,7 @@ def gen_vertices_timeseries(rh_surface_file, lh_surface_file): mghobj1.load(rh_surface_file) vol = mghobj1.vol (x, y) = vol.shape - # print "rh shape", x, y + # IFLOGGER.info("rh shape %s %s", x, y) np.savetxt(rh_file, vol, delimiter="\t") out_list.append(rh_file) @@ -765,7 +767,7 @@ def gen_vertices_timeseries(rh_surface_file, lh_surface_file): mghobj2.load(lh_surface_file) vol = mghobj2.vol (x, y) = vol.shape - # print "lh shape", x, y + # IFLOGGER.info("lh shape %s %s", x, y) np.savetxt(lh_file, vol, delimiter=",") out_list.append(lh_file) diff --git a/CPAC/unet/__init__.py b/CPAC/unet/__init__.py index 70826416c8..8f4d06879c 100644 --- a/CPAC/unet/__init__.py +++ b/CPAC/unet/__init__.py @@ -17,9 +17,9 @@ from ._torch import torch # this import has to be first to install torch from .dataset import BlockDataset, VolumeDataset from .function import ( - MyParser, estimate_dice, extract_large_comp, + MyParser, predict_volumes, write_nifti, ) diff --git a/CPAC/unet/_torch.py b/CPAC/unet/_torch.py index 6b9ec0ac61..bc34d0049d 100644 --- a/CPAC/unet/_torch.py +++ b/CPAC/unet/_torch.py @@ -39,7 +39,8 @@ def _custom_pip_install(env_var: Optional[str] = None) -> None: """ if env_var is not None: if env_var not in os.environ: - raise FileNotFoundError(f"${env_var}") + msg = f"${env_var}" + raise FileNotFoundError(msg) site.USER_BASE = os.environ["PYTHONUSERBASE"] = os.path.join( os.environ[env_var], ".local" ) diff --git a/CPAC/unet/dataset.py b/CPAC/unet/dataset.py index 74ad9193f9..a831180d29 100755 --- a/CPAC/unet/dataset.py +++ b/CPAC/unet/dataset.py @@ -1,3 +1,19 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import sys @@ -7,6 +23,8 @@ from torch.utils import data import nibabel as nib +from CPAC.utils.monitoring import IFLOGGER + class VolumeDataset(data.Dataset): def __init__( @@ -29,6 +47,7 @@ def __init__( self.rimg_dir = rimg_dir self.rimg_files = [rimg_file] else: + IFLOGGER.error("Invalid rimg_in: %s", rimg_in) sys.exit(1) # Corrected Images @@ -47,9 +66,7 @@ def __init__( self.cimg_dir = cimg_dir self.cimg_files = [cimg_file] else: - # print(type(cimg_in)) - # print(type(str(cimg_in))) - # print(str(cimg_in)) + IFLOGGER.error("Invalid cimg_in: %s", cimg_in) sys.exit(1) # Brain Masks @@ -67,6 +84,7 @@ def __init__( self.bmsk_dir = bmsk_dir self.bmsk_files = [bmsk_file] else: + IFLOGGER.error("Invalid bmsk_in: %s", bmsk_in) sys.exit(1) self.cur_rimg_nii = None @@ -90,11 +108,11 @@ def __len__(self): def __getitem__(self, index): if self.debug: if isinstance(self.rimg_files, list): - pass + IFLOGGER.debug(self.rimg_files[index]) if isinstance(self.cimg_files, list): - pass + IFLOGGER.debug(self.cimg_files[index]) if isinstance(self.bmsk_files, list): - pass + IFLOGGER.debug(self.bmsk_files[index]) Out = [] if isinstance(self.rimg_files, list): @@ -144,6 +162,7 @@ def __init__(self, rimg=None, bfld=None, bmsk=None, num_slice=3, rescale_dim=256 super(BlockDataset, self).__init__() if isinstance(bmsk, torch.Tensor) and rimg.shape != bmsk.shape: + IFLOGGER.error("Invalid shape of image %s", rimg.shape) return raw_shape = rimg.data[0].shape max_dim = torch.tensor(raw_shape).max() diff --git a/CPAC/unet/function.py b/CPAC/unet/function.py index f291915170..6e7658835f 100644 --- a/CPAC/unet/function.py +++ b/CPAC/unet/function.py @@ -1,5 +1,23 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from click import BadParameter +from CPAC.utils.monitoring import IFLOGGER + class MyParser(BadParameter): def error(self, message): @@ -111,6 +129,7 @@ def predict_volumes( NoneType = type(None) if isinstance(rimg_in, NoneType) and isinstance(cimg_in, NoneType): + IFLOGGER.error("Input rimg_in or cimg_in") sys.exit(1) if save_dice: @@ -155,6 +174,7 @@ def predict_volumes( rescale_dim=rescale_dim, ) else: + IFLOGGER.error("Invalid Volume Dataset!") sys.exit(2) rescale_shape = block_dataset.get_rescale_shape() @@ -220,7 +240,7 @@ def predict_volumes( bmsk = bmsk.data[0].numpy() dice = estimate_dice(bmsk, pr_bmsk_final) if verbose: - pass + IFLOGGER.info(dice) t1w_nii = volume_dataset.getCurCimgNii() t1w_path = t1w_nii.get_filename() diff --git a/CPAC/utils/__init__.py b/CPAC/utils/__init__.py index 9ff85f2446..6338d7deac 100644 --- a/CPAC/utils/__init__.py +++ b/CPAC/utils/__init__.py @@ -1,5 +1,46 @@ -from .configuration import Configuration, check_pname, set_subject +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""General utilities for C-PAC.""" +from . import build_data_config, create_fsl_flame_preset, versioning +from .configuration import check_pname, Configuration, set_subject from .datatypes import ListFromItem from .interfaces import function +from .sklearn import check_random_state +from .utils import ( + correlation, + find_files, + get_zscore, + repickle, + safe_shape, +) -__all__ = ["check_pname", "Configuration", "function", "ListFromItem", "set_subject"] +__all__ = [ + "build_data_config", + "check_pname", + "check_random_state", + "Configuration", + "correlation", + "create_fsl_flame_preset", + "find_files", + "function", + "get_zscore", + "ListFromItem", + "repickle", + "safe_shape", + "set_subject", + "versioning", +] diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 844a4cd6e8..6058542a17 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2023 C-PAC Developers +# Copyright (C) 2016-2024 C-PAC Developers # This file is part of C-PAC. @@ -20,8 +20,11 @@ import sys from warnings import warn +from botocore.exceptions import BotoCoreError import yaml +from CPAC.utils.monitoring import UTLOGGER + def bids_decode_fname(file_path, dbg=False, raise_error=True): f_dict = {} @@ -30,33 +33,39 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): # first lets make sure that we know how to handle the file if "nii" not in fname.lower() and "json" not in fname.lower(): - raise IOError( - "File (%s) does not appear to be" % fname + "a nifti or json file" - ) + msg = f"File ({fname}) does not appear to be a nifti or json file" + raise IOError(msg) if dbg: - pass + UTLOGGER.debug("parsing %s", file_path) # first figure out if there is a site directory level, this isn't # specified in BIDS currently, but hopefully will be in the future file_path_vals = os.path.dirname(file_path).split("/") sub = [s for s in file_path_vals if "sub-" in s] if dbg: - pass + UTLOGGER.debug("found subject %s in %s", sub, file_path_vals) if len(sub) > 1: - pass + UTLOGGER.debug( + "Odd that there is more than one subject directory in (%s), does the" + " filename conform to BIDS format?", + file_path, + ) if sub: sub_ndx = file_path_vals.index(sub[0]) if sub_ndx > 0 and file_path_vals[sub_ndx - 1]: if dbg: - pass + UTLOGGER.debug("setting site to %s", file_path_vals[sub_ndx - 1]) f_dict["site"] = file_path_vals[sub_ndx - 1] else: f_dict["site"] = "none" elif file_path_vals[-1]: if dbg: - pass + UTLOGGER.debug( + "looking for subject id didn't pan out settling for last subdir %s", + file_path_vals[-1], + ) f_dict["site"] = file_path_vals[-1] else: f_dict["site"] = "none" @@ -77,33 +86,32 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): if "scantype" not in f_dict: msg = ( - "Filename ({0}) does not appear to contain" - " scan type, does it conform to the BIDS format?".format(fname) + f"Filename ({fname}) does not appear to contain" + " scan type, does it conform to the BIDS format?" ) if raise_error: raise ValueError(msg) else: - pass + UTLOGGER.error(msg) elif not f_dict["scantype"]: msg = ( - "Filename ({0}) does not appear to contain" - " scan type, does it conform to the BIDS format?".format(fname) + f"Filename ({fname}) does not appear to contain" + " scan type, does it conform to the BIDS format?" ) if raise_error: raise ValueError(msg) else: - pass + UTLOGGER.error(msg) else: if "bold" in f_dict["scantype"] and not f_dict["task"]: msg = ( - "Filename ({0}) is a BOLD file, but " - "doesn't contain a task, does it conform to the" - " BIDS format?".format(fname) + f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does" + " it conform to the BIDS format?" ) if raise_error: raise ValueError(msg) else: - pass + UTLOGGER.error(msg) return f_dict @@ -275,14 +283,16 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): key = "-".join([level, "none"]) if dbg: - pass + UTLOGGER.debug(key) # if the key doesn't exist in the config dictionary, check to see if # the generic key exists and return that if key in t_dict: t_dict = t_dict[key] else: if dbg: - pass + UTLOGGER.debug( + "Couldn't find %s, so going with %s", key, "-".join([level, "none"]) + ) key = "-".join([level, "none"]) if key in t_dict: t_dict = t_dict[key] @@ -293,7 +303,7 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): # sidecar files if dbg: - pass + UTLOGGER.debug(t_dict) for key in t_dict.keys(): if "RepetitionTime" in key: @@ -334,7 +344,7 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): t_dict = t_dict[key] if dbg: - pass + UTLOGGER.debug(bids_config_dict) # get the paths to the json yaml files in config_dict, the paths contain # the information needed to map the parameters from the jsons (the vals @@ -345,11 +355,11 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): config_paths = sorted(config_dict.keys(), key=lambda p: len(p.split("/"))) if dbg: - pass + UTLOGGER.debug(config_paths) for cp in config_paths: if dbg: - pass + UTLOGGER.debug("processing %s", cp) # decode the filepath into its various components as defined by BIDS f_dict = bids_decode_fname(cp, raise_error=raise_error) @@ -503,7 +513,7 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): if run_info not in subjdict[subj_info]["funcs"]: subjdict[subj_info]["funcs"][run_info] = {"run_info": run_info} if resource in subjdict[subj_info]["funcs"][run_info]: - pass + UTLOGGER.warning("resource %s already exists in subjdict ??", resource) subjdict[subj_info]["funcs"][run_info][resource] = p else: subjdict[subj_info][resource] = p @@ -513,6 +523,7 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): missing = 0 for tkey in top_keys: if tkey not in subj_res: + UTLOGGER.warning("%s not found for %s", tkey, subj_info) missing += 1 break @@ -520,9 +531,13 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): for func_key, func_res in subj_res["funcs"].items(): for bkey in bot_keys: if bkey not in func_res: + UTLOGGER.warning("%s not found for %s", bkey, func_key) missing += 1 break if missing == 0: + UTLOGGER.info( + "adding: %s, %s, %d", subj_info, func_key, len(sublist) + ) tdict = copy.deepcopy(subj_res) del tdict["funcs"] tdict.update(func_res) @@ -584,7 +599,14 @@ def bids_gen_cpac_sublist( to be processed """ if dbg: - pass + UTLOGGER.debug( + "gen_bids_sublist called with:\n bids_dir: %s\n # paths: %s" + "\n config_dict: %s\n creds_path: %s", + bids_dir, + len(paths_list), + "missing" if not config_dict else "found", + creds_path, + ) # if configuration information is not desired, config_dict will be empty, # otherwise parse the information in the sidecar json files into a dict @@ -615,7 +637,9 @@ def bids_gen_cpac_sublist( if config_dict: t_params = bids_retrieve_params(bids_config_dict, f_dict) if not t_params: - pass + UTLOGGER.warning( + "Did not receive any parameters for %s, is this a problem?", p + ) task_info = { "scan": os.path.join(bids_dir, p), @@ -652,7 +676,15 @@ def bids_gen_cpac_sublist( "lesion_mask" ] = task_info["scan"] else: - pass + UTLOGGER.warning( + "Lesion mask file (%s) already found for (%s:%s)" + " discarding %s", + subdict[f_dict["sub"]][f_dict["ses"]]["lesion_mask"], + f_dict["sub"], + f_dict["ses"], + p, + ) + # TODO deal with scan parameters anatomical if "anat" not in subdict[f_dict["sub"]][f_dict["ses"]]: subdict[f_dict["sub"]][f_dict["ses"]]["anat"] = {} @@ -689,7 +721,14 @@ def bids_gen_cpac_sublist( subdict[f_dict["sub"]][f_dict["ses"]]["func"][task_key] = task_info else: - pass + UTLOGGER.warning( + "Func file (%s) already found for (%s: %s: %s) discarding %s", + subdict[f_dict["sub"]][f_dict["ses"]]["func"][task_key], + f_dict["sub"], + f_dict["ses"], + task_key, + p, + ) if "phase" in f_dict["scantype"]: if "fmap" not in subdict[f_dict["sub"]][f_dict["ses"]]: @@ -720,11 +759,11 @@ def bids_gen_cpac_sublist( if "fmap" not in subdict[f_dict["sub"]][f_dict["ses"]]: subdict[f_dict["sub"]][f_dict["ses"]]["fmap"] = {} if ( - "epi_{0}".format(pe_dir) + f"epi_{pe_dir}" not in subdict[f_dict["sub"]][f_dict["ses"]]["fmap"] ): subdict[f_dict["sub"]][f_dict["ses"]]["fmap"][ - "epi_{0}".format(pe_dir) + f"epi_{pe_dir}" ] = task_info sublist = [] @@ -734,9 +773,19 @@ def bids_gen_cpac_sublist( sublist.append(ses) else: if "anat" not in ses: - pass + UTLOGGER.warning( + "%s %s %s is missing an anat", + ses["site_id"] if "none" not in ses["site_id"] else "", + ses["subject_id"], + ses["unique_id"], + ) if "func" not in ses: - pass + UTLOGGER.warning( + "%s %s %s is missing a func", + ses["site_id"] if "none" not in ses["site_id"] else "", + ses["subject_id"], + ses["unique_id"], + ) return sublist @@ -777,6 +826,8 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name) + UTLOGGER.info("gathering files from S3 bucket (%s) for %s", bucket, prefix) + for s3_obj in bucket.objects.filter(Prefix=prefix): for suf in suffixes: if suf in str(s3_obj.key): @@ -787,8 +838,12 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): config_dict[ s3_obj.key.replace(prefix, "").lstrip("/") ] = json.loads(s3_obj.get()["Body"].read()) - except Exception: - raise + except Exception as e: + msg = ( + f"Error retrieving {s3_obj.key.replace(prefix, '')}" + f" ({e.message})" + ) + raise BotoCoreError(msg) from e elif "nii" in str(s3_obj.key): file_paths.append( str(s3_obj.key).replace(prefix, "").lstrip("/") @@ -815,17 +870,16 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): } ) except UnicodeDecodeError: - raise Exception( - "Could not decode {0}".format(os.path.join(root, f)) - ) + msg = f"Could not decode {os.path.join(root, f)}" + raise UnicodeDecodeError(msg) if not file_paths and not config_dict: - raise IOError( - "Didn't find any files in {0}. Please verify that the " - "path is typed correctly, that you have read access to " - "the directory, and that it is not " - "empty.".format(bids_dir) + msg = ( + f"Didn't find any files in {bids_dir}. Please verify that the path is" + " typed correctly, that you have read access to the directory, and that it" + " is not empty." ) + raise IOError(msg) return file_paths, config_dict @@ -937,7 +991,8 @@ def load_yaml_config(config_filename, aws_input_creds): config_content = b64decode(encoded) return yaml.safe_load(config_content) except: - raise + msg = f"Error! Could not find load config from data URI {config_filename}" + raise BotoCoreError(msg) if config_filename.lower().startswith("s3://"): # s3 paths begin with s3://bucket/ @@ -961,7 +1016,8 @@ def load_yaml_config(config_filename, aws_input_creds): try: return yaml.safe_load(open(config_filename, "r")) except IOError: - raise + msg = f"Error! Could not find config file {config_filename}" + raise FileNotFoundError(msg) def cl_strip_brackets(arg_list): @@ -1019,6 +1075,8 @@ def create_cpac_data_config( ------- list """ + UTLOGGER.info("Parsing %s..", bids_dir) + (file_paths, config) = collect_bids_files_configs(bids_dir, aws_input_creds) if participant_labels and file_paths: @@ -1032,6 +1090,7 @@ def create_cpac_data_config( ] if not file_paths: + UTLOGGER.error("Did not find data for %s", ", ".join(participant_labels)) sys.exit(1) raise_error = not skip_bids_validator @@ -1046,6 +1105,7 @@ def create_cpac_data_config( ) if not sub_list: + UTLOGGER.error("Did not find data in %s", bids_dir) sys.exit(1) return sub_list @@ -1083,6 +1143,13 @@ def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds) ] if not sub_list: + UTLOGGER.error( + "Did not find data for %s in %s", + ", ".join(participant_labels), + data_config_file + if not data_config_file.startswith("data:") + else "data URI", + ) sys.exit(1) return sub_list diff --git a/CPAC/utils/build_data_config.py b/CPAC/utils/build_data_config.py index 8b4ebc21e8..f74af8b869 100644 --- a/CPAC/utils/build_data_config.py +++ b/CPAC/utils/build_data_config.py @@ -1,4 +1,41 @@ -def gather_file_paths(base_directory, verbose=False): +# Copyright (C) 2017-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Build a C-PAC data configuration.""" +from logging import basicConfig, INFO +from typing import Any + +from CPAC.utils.monitoring.custom_logging import getLogger +from CPAC.utils.typing import DICT, LIST, PATHSTR + +logger = getLogger("CPAC.utils.data-config") +basicConfig(format="%(message)s", level=INFO) + + +def _cannot_write(file_name: PATHSTR) -> None: + """Raise an IOError when a file cannot be written to disk.""" + msg = ( + f"\n\nCPAC says: I couldn't save this file to your drive:\n{file_name}\n\nMake" + " sure you have write access? Then come back. Don't worry.. I'll wait.\n\n" + ) + raise IOError(msg) + + +def gather_file_paths(base_directory, verbose=False) -> LIST[PATHSTR]: + """Return a list of file paths from a base directory.""" # this will go into core tools eventually # ideas: return number of paths, optionally instead @@ -19,15 +56,33 @@ def gather_file_paths(base_directory, verbose=False): path_list.append(fullpath) if verbose: - pass + logger.info("Number of paths: %s", len(path_list)) return path_list -def pull_s3_sublist(data_folder, creds_path=None, keep_prefix=True): - """Return a list of input data file paths that are available on an AWS S3 - bucket on the cloud. - """ +def _no_anatomical_found( + data_dct: dict, + verbose: bool, + purpose: str, + entity: str, + _id: str, + file_path: PATHSTR, +) -> dict: + """Return the data dictionary and warn no anatomical entries are found.""" + if verbose: + logger.warning( + "No anatomical entries found for %s for %s %s:\n%s\n", + purpose, + entity, + _id, + file_path, + ) + return data_dct + + +def pull_s3_sublist(data_folder, creds_path=None, keep_prefix=True) -> LIST[PATHSTR]: + """Return a list of input data file paths that are available from AWS S3.""" import os from indi_aws import fetch_creds @@ -39,6 +94,8 @@ def pull_s3_sublist(data_folder, creds_path=None, keep_prefix=True): bucket_name = s3_path.split("/")[0] bucket_prefix = s3_path.split(bucket_name + "/")[1] + logger.info("Pulling from %s ...", data_folder) + s3_list = [] bucket = fetch_creds.return_bucket(creds_path, bucket_name) @@ -56,21 +113,24 @@ def pull_s3_sublist(data_folder, creds_path=None, keep_prefix=True): else: s3_list.append(str(bk.key).replace(bucket_prefix, "")) + logger.info("Finished pulling from S3. %s file paths found.", len(s3_list)) + if not s3_list: err = ( "\n\n[!] No input data found matching your data settings in " - "the AWS S3 bucket provided:\n{0}\n\n".format(data_folder) + f"the AWS S3 bucket provided:\n{data_folder}\n\n" ) - raise Exception(err) + raise FileNotFoundError(err) return s3_list def get_file_list( base_directory, creds_path=None, write_txt=None, write_pkl=None, write_info=False -): - """Return a list of input and data file paths either stored locally or on - an AWS S3 bucket on the cloud. +) -> LIST[PATHSTR]: + """Return a list of input and data file paths. + + These paths are either stored locally or on an AWS S3 bucket on the cloud. """ import os @@ -85,26 +145,28 @@ def get_file_list( if len(file_list) == 0: warn = ( "\n\n[!] No files were found in the base directory you " - "provided.\n\nDirectory: {0}\n\n".format(base_directory) + f"provided.\n\nDirectory: {base_directory}\n\n" ) - raise Exception(warn) + raise FileNotFoundError(warn) if write_txt: if ".txt" not in write_txt: - write_txt = "{0}.txt".format(write_txt) + write_txt = f"{write_txt}.txt" write_txt = os.path.abspath(write_txt) with open(write_txt, "wt") as f: for path in file_list: - f.write("{0}\n".format(path)) + f.write(f"{path}\n") + logger.info("\nFilepath list text file written:\n%s", write_txt) if write_pkl: import pickle if ".pkl" not in write_pkl: - write_pkl = "{0}.pkl".format(write_pkl) + write_pkl = f"{write_pkl}.pkl" write_pkl = os.path.abspath(write_pkl) with open(write_pkl, "wb") as f: pickle.dump(list(file_list), f) + logger.info("\nFilepath list pickle file written:\n%s", write_pkl) if write_info: niftis = [] @@ -127,10 +189,21 @@ def get_file_list( if "participants.tsv" in path: part_tsvs.append(path) + logger.info( + "\nBase directory: %s\nFile paths found: %s\n..NIFTI files: %s\n..JSON files: %s", + base_directory, + len(file_list), + len(niftis), + len(jsons), + ) + if jsons: - pass + logger.info( + "....%s of which are scan parameter JSON files", len(scan_jsons) + ) + logger.info("..CSV files: %s\n..TSV files: %s", len(csvs), len(tsvs)) if tsvs: - pass + logger.info("....%s of which are participants.tsv files", len(part_tsvs)) return file_list @@ -159,7 +232,7 @@ def download_single_s3_path( s3_prefix = s3_path.replace("s3://", "") else: err = "[!] S3 file paths must be pre-pended with the 's3://' prefix." - raise Exception(err) + raise SyntaxError(err) bucket_name = s3_prefix.split("/")[0] data_dir = s3_path.split(bucket_name + "/")[1] @@ -174,76 +247,43 @@ def download_single_s3_path( if os.path.isfile(local_dl): if overwrite: + logger.info("\nS3 bucket file already downloaded! Overwriting..") aws_utils.s3_download(bucket, ([data_dir], [local_dl])) else: - pass + logger.info( + "\nS3 bucket file already downloaded! Skipping download.\nS3 file: %s\nLocal file already exists: %s\n", + s3_path, + local_dl, + ) else: aws_utils.s3_download(bucket, ([data_dir], [local_dl])) return local_dl -def pull_s3_sublist(data_folder, creds_path=None, keep_prefix=True): - import os - - from indi_aws import fetch_creds - - if creds_path: - creds_path = os.path.abspath(creds_path) - - s3_path = data_folder.split("s3://")[1] - bucket_name = s3_path.split("/")[0] - bucket_prefix = s3_path.split(bucket_name + "/")[1] - - s3_list = [] - bucket = fetch_creds.return_bucket(creds_path, bucket_name) - - # ensure slash at end of bucket_prefix, so that if the final - # directory name is a substring in other directory names, these - # other directories will not be pulled into the file list - if "/" not in bucket_prefix[-1]: - bucket_prefix += "/" - - # Build S3-subjects to download - for bk in bucket.objects.filter(Prefix=bucket_prefix): - if keep_prefix: - fullpath = os.path.join("s3://", bucket_name, str(bk.key)) - s3_list.append(fullpath) - else: - s3_list.append(str(bk.key).replace(bucket_prefix, "")) - - if not s3_list: - err = ( - "\n\n[!] No input data found matching your data settings in " - "the AWS S3 bucket provided:\n{0}\n\n".format(data_folder) - ) - raise Exception(err) - - return s3_list - - def generate_group_analysis_files(data_config_outdir, data_config_name): """Create the group-level analysis inclusion list.""" import csv import os - from sets import Set import yaml data_config_path = os.path.join(data_config_outdir, data_config_name) try: subjects_list = yaml.safe_load(open(data_config_path, "r")) - except: - "\n\n[!] Data configuration file couldn't be read!\nFile " "path: {0}\n".format( - data_config_path + except (OSError, TypeError) as e: + msg = ( + "\n\n[!] Data configuration file couldn't be read!\nFile path:" + f"{data_config_path}\n" ) + raise OSError(msg) from e - subject_scan_set = Set() - subID_set = Set() - session_set = Set() - subject_set = Set() - scan_set = Set() + subject_scan_set = set() + subID_set = set() + session_set = set() + subject_set = set() + scan_set = set() data_list = [] try: @@ -276,9 +316,14 @@ def generate_group_analysis_files(data_config_outdir, data_config_name): subject_set.add(subject_id) except TypeError: + logger.error( + "Subject list could not be populated!\nThis is most likely due to a" + " mis-formatting in your inclusion and/or exclusion subjects txt file or" + " your anatomical and/or functional path templates." + ) err_str = ( - "Check formatting of your anatomical/functional path " - "templates and inclusion/exclusion subjects text files" + "Check formatting of your anatomical/functional path templates and" + " inclusion/exclusion subjects text files" ) raise TypeError(err_str) @@ -306,8 +351,8 @@ def generate_group_analysis_files(data_config_outdir, data_config_name): try: f = open(file_name, "wb") - except: - raise IOError + except (OSError, TypeError): + _cannot_write(file_name) writer = csv.writer(f) @@ -317,6 +362,8 @@ def generate_group_analysis_files(data_config_outdir, data_config_name): f.close() + logger.info("Template phenotypic file for group analysis - %s", file_name) + # generate the group analysis subject lists file_name = os.path.join( data_config_outdir, "participant_list_group_analysis_%s.txt" % data_config_name @@ -325,15 +372,20 @@ def generate_group_analysis_files(data_config_outdir, data_config_name): try: with open(file_name, "w") as f: for sub in sorted(subID_set): - print(sub, file=f) - except: - raise IOError + f.write(f"{sub}\n") + except (AttributeError, OSError, TypeError, ValueError): + _cannot_write(file_name) + + logger.info( + "Participant list required later for group analysis - %s\n\n", file_name + ) -def extract_scan_params_csv(scan_params_csv): +def extract_scan_params_csv(scan_params_csv: PATHSTR) -> DICT[str, Any]: """ - Function to extract the site-based scan parameters from a csv file - and return a dictionary of their values. + Extract the site-based scan parameters from a csv file. + + Returns a dictionary of their values. Parameters ---------- @@ -442,8 +494,10 @@ def extract_scan_params_csv(scan_params_csv): def format_incl_excl_dct(incl_list, info_type="participants"): - """Create either an inclusion or exclusion dictionary to determine which - input files to include or not include in the data configuration file. + """Create either an inclusion or exclusion dictionary... + + ...to determine which input files to include or not include in the data + configuration file. """ incl_dct = {} @@ -477,7 +531,9 @@ def get_BIDS_data_dct( exclusion_dct=None, config_dir=None, ): - """Return a data dictionary mapping input file paths to participant, + """Return a data dictionary... + + ...mapping input file paths to participant, session, scan, and site IDs (where applicable) for a BIDS-formatted data directory. @@ -518,7 +574,7 @@ def get_BIDS_data_dct( ) func = os.path.join( bids_base_dir, - "sub-{participant}/func/sub-{participant}_task-" "{scan}_bold.nii.gz", + "sub-{participant}/func/sub-{participant}_task-{scan}_bold.nii.gz", ) fmap_phase_sess = os.path.join( @@ -528,7 +584,7 @@ def get_BIDS_data_dct( "diff.nii.gz", ) fmap_phase = os.path.join( - bids_base_dir, "sub-{participant}/fmap/sub-{participant}" "*phasediff.nii.gz" + bids_base_dir, "sub-{participant}/fmap/sub-{participant}*phasediff.nii.gz" ) fmap_mag_sess = os.path.join( @@ -539,7 +595,7 @@ def get_BIDS_data_dct( ) fmap_mag = os.path.join( - bids_base_dir, "sub-{participant}/fmap/sub-{participant}" "*magnitud*.nii.gz" + bids_base_dir, "sub-{participant}/fmap/sub-{participant}*magnitud*.nii.gz" ) fmap_pedir_sess = os.path.join( @@ -550,20 +606,18 @@ def get_BIDS_data_dct( ) fmap_pedir = os.path.join( - bids_base_dir, "sub-{participant}/fmap/sub-{participant}" "*acq-fMR*_epi.nii.gz" + bids_base_dir, "sub-{participant}/fmap/sub-{participant}*acq-fMR*_epi.nii.gz" ) sess_glob = os.path.join(bids_base_dir, "sub-*/ses-*/*") fmap_phase_scan_glob = os.path.join( - bids_base_dir, "sub-*fmap/" "sub-*phasediff.nii.gz" + bids_base_dir, "sub-*fmap/sub-*phasediff.nii.gz" ) - fmap_mag_scan_glob = os.path.join( - bids_base_dir, "sub-*fmap/" "sub-*magnitud*.nii.gz" - ) + fmap_mag_scan_glob = os.path.join(bids_base_dir, "sub-*fmap/sub-*magnitud*.nii.gz") - os.path.join(bids_base_dir, "sub-*fmap/" "sub-*_*acq-fMR*_epi.nii.gz") + os.path.join(bids_base_dir, "sub-*fmap/sub-*_*acq-fMR*_epi.nii.gz") part_tsv_glob = os.path.join(bids_base_dir, "*participants.tsv") @@ -627,7 +681,7 @@ def get_BIDS_data_dct( ) fmap_phase = os.path.join( bids_base_dir, - "sub-{participant}/fmap/sub-{participant}" "*phasediff.nii.gz", + "sub-{participant}/fmap/sub-{participant}*phasediff.nii.gz", ) if fnmatch.fnmatch(filepath, fmap_mag_scan_glob): @@ -639,7 +693,7 @@ def get_BIDS_data_dct( ) fmap_mag = os.path.join( bids_base_dir, - "sub-{participant}/fmap/sub-{participant}" "*magnitud*.nii.gz", + "sub-{participant}/fmap/sub-{participant}*magnitud*.nii.gz", ) """ @@ -698,10 +752,18 @@ def get_BIDS_data_dct( import csv if part_tsv.startswith("s3://"): + logger.info( + "\n\nFound a participants.tsv file in your BIDS data set on the S3" + " bucket. Downloading..\n" + ) part_tsv = download_single_s3_path( part_tsv, config_dir, aws_creds_path, overwrite=True ) + logger.info( + "Checking participants.tsv file for site information:\n%s", part_tsv + ) + with open(part_tsv, "r") as f: tsv = csv.DictReader(f) for row in tsv: @@ -715,6 +777,7 @@ def get_BIDS_data_dct( if sites_dct: # check for duplicates sites = sites_dct.keys() + logger.info("%s sites found in the participant.tsv file.", len(sites)) for site in sites: for other_site in sites: if site == other_site: @@ -722,22 +785,19 @@ def get_BIDS_data_dct( dups = set(sites_dct[site]) & set(sites_dct[other_site]) if dups: err = ( - "\n\n[!] There are duplicate participant IDs " - "in different sites, as defined by your " - "participants.tsv file! Consider pre-fixing " - "the participant IDs with the site names.\n\n" - "Duplicates:\n" - "Sites: {0}, {1}\n" - "Duplicate IDs: {2}" - "\n\n".format(site, other_site, str(dups)) + "\n\n[!] There are duplicate participant IDs in different" + " sites, as defined by your participants.tsv file!" + " Consider prefixing the participant IDs with the site" + f" names.\n\nDuplicates:\nSites: {site}, {other_site}\n" + f"Duplicate IDs: {dups!s}\n\n" ) - raise Exception(err) + raise LookupError(err) # now invert for sub in sites_dct[site]: sites_subs_dct[sub] = site else: - pass + logger.warning("No site information found in the participants.tsv file.") if not sites_subs_dct: # if there was no participants.tsv file, (or no site column in the @@ -766,7 +826,7 @@ def get_BIDS_data_dct( # instead of /bids_dir/sub-01/func/sub-01_task-rest_bold.json for json_file in site_jsons: # get site ID - site_id = json_file.replace("{0}/".format(bids_base_dir), "").split("/")[0] + site_id = json_file.replace(f"{bids_base_dir}/", "").split("/")[0] if "site-" in site_id: site_id = site_id.replace("site-", "") @@ -780,17 +840,17 @@ def get_BIDS_data_dct( run_id = None acq_id = None - for id in ids: - if "sub-" in id: - sub_id = id.replace("sub-", "") - if "ses-" in id: - ses_id = id.replace("ses-", "") - if "task-" in id: - scan_id = id.replace("task-", "") - if "run-" in id: - run_id = id.replace("run-", "") - if "acq-" in id: - acq_id = id.replace("acq-", "") + for _id in ids: + if "sub-" in _id: + sub_id = _id.replace("sub-", "") + if "ses-" in _id: + ses_id = _id.replace("ses-", "") + if "task-" in _id: + scan_id = _id.replace("task-", "") + if "run-" in _id: + run_id = _id.replace("run-", "") + if "acq-" in _id: + acq_id = _id.replace("acq-", "") if run_id or acq_id: json_filename = os.path.basename(json_file) @@ -806,9 +866,9 @@ def get_BIDS_data_dct( # {All}_run-1, for example, to be interpreted later when # matching scan params JSONs to each func scan scan_id = "[All]" - for id in json_filename.split("_"): - if "run-" in id or "acq-" in id: - scan_id = "{0}_{1}".format(scan_id, id) + for additional_id in json_filename.split("_"): + if "run-" in additional_id or "acq-" in additional_id: + scan_id = f"{scan_id}_{additional_id}" if site_id not in scan_params_dct.keys(): scan_params_dct[site_id] = {} @@ -834,17 +894,17 @@ def get_BIDS_data_dct( run_id = None acq_id = None - for id in ids: - if "sub-" in id: - sub_id = id.replace("sub-", "") - if "ses-" in id: - ses_id = id.replace("ses-", "") - if "task-" in id: - scan_id = id.replace("task-", "") - if "run-" in id: - run_id = id.replace("run-", "") - if "acq-" in id: - acq_id = id.replace("acq-", "") + for _id in ids: + if "sub-" in _id: + sub_id = _id.replace("sub-", "") + if "ses-" in _id: + ses_id = _id.replace("ses-", "") + if "task-" in _id: + scan_id = _id.replace("task-", "") + if "run-" in _id: + run_id = _id.replace("run-", "") + if "acq-" in _id: + acq_id = _id.replace("acq-", "") if run_id or acq_id: json_filename = os.path.basename(json_file) @@ -860,9 +920,9 @@ def get_BIDS_data_dct( # {All}_run-1, for example, to be interpreted later when # matching scan params JSONs to each func scan scan_id = "[All]" - for id in json_filename.split("_"): - if "run-" in id or "acq-" in id: - scan_id = "{0}_{1}".format(scan_id, id) + for additional_id in json_filename.split("_"): + if "run-" in additional_id or "acq-" in additional_id: + scan_id = f"{scan_id}_{additional_id}" if site_id not in scan_params_dct.keys(): scan_params_dct[site_id] = {} @@ -916,8 +976,10 @@ def get_BIDS_data_dct( def find_unique_scan_params(scan_params_dct, site_id, sub_id, ses_id, scan_id): - """Return the scan parameters information stored in the provided scan - parameters dictionary for the IDs of a specific functional input scan. + """Return the scan parameters information... + + ...stored in the provided scan parameters dictionary for the IDs of a specific + functional input scan. """ scan_params = None @@ -925,7 +987,8 @@ def find_unique_scan_params(scan_params_dct, site_id, sub_id, ses_id, scan_id): site_id = "All" try: scan_params_dct[site_id] = {} - except: + except TypeError: + logger.info("%s", scan_params_dct) scan_params_dct = {site_id: {}} if sub_id not in scan_params_dct[site_id]: sub_id = "All" @@ -950,21 +1013,26 @@ def find_unique_scan_params(scan_params_dct, site_id, sub_id, ses_id, scan_id): try: scan_params = scan_params_dct[site_id][sub_id][ses_id][scan_id] - except TypeError: + except TypeError as type_error: # this ideally should never fire err = ( - "\n[!] The scan parameters dictionary supplied to the data " - "configuration builder is not in the proper format.\n\n The " - "key combination that caused this error:\n{0}, {1}, {2}, {3}" - "\n\n".format(site_id, sub_id, ses_id, scan_id) + "\n[!] The scan parameters dictionary supplied to the data configuration" + " builder is not in the proper format.\n\n The key combination that caused" + f" this error:\n{site_id}, {sub_id}, {ses_id}, {scan_id}\n\n" ) - raise Exception(err) + raise SyntaxError(err) from type_error except KeyError: pass if not scan_params: - "\n[!] No scan parameter information found in your scan " "parameter configuration for the functional input file:\n" "site: {0}, participant: {1}, session: {2}, series: {3}\n\n" "".format( - site_id, sub_id, ses_id, scan_id + logger.warning( + "\n[!] No scan parameter information found in your scan parameter" + " configuration for the functional input file:\nsite: %s, participant: %s," + " session: %s, series: %s\n\n", + site_id, + sub_id, + ses_id, + scan_id, ) return scan_params @@ -982,9 +1050,10 @@ def update_data_dct( exclusion_dct=None, aws_creds_path=None, verbose=True, -): - """Return a data dictionary with a new file path parsed and added in, - keyed with its appropriate ID labels. +) -> DICT[str, Any]: + """Return a data dictionary with a new file path parsed and added in,... + + ...keyed with its appropriate ID labels. """ import glob import os @@ -998,47 +1067,41 @@ def update_data_dct( return data_dct if data_type == "anat": - # pick the right anatomical scan, if "anatomical_scan" has been - # provided + # pick the right anatomical scan, if "anatomical_scan" has been provided if anat_scan: file_name = os.path.basename(file_path) if anat_scan not in file_name: return data_dct - else: - # if we're dealing with BIDS here - if "sub-" in file_name and "T1w." in file_name: - anat_scan_identifier = False - # BIDS tags are delineated with underscores - bids_tags = [] - for tag in file_name.split("_"): - if anat_scan == tag: - # the "anatomical_scan" substring provided is - # one of the BIDS tags + # if we're dealing with BIDS here + if "sub-" in file_name and "T1w." in file_name: + anat_scan_identifier = False + # BIDS tags are delineated with underscores + bids_tags = [] + for tag in file_name.split("_"): + if anat_scan == tag: + # the "anatomical_scan" substring provided is + # one of the BIDS tags + anat_scan_identifier = True + else: + if "sub-" not in tag and "ses-" not in tag and "T1w" not in tag: + bids_tags.append(tag) + if anat_scan in tag: + # the "anatomical_scan" substring provided was + # found in one of the BIDS tags anat_scan_identifier = True - else: - if ( - "sub-" not in tag - and "ses-" not in tag - and "T1w" not in tag - ): - bids_tags.append(tag) - if anat_scan in tag: - # the "anatomical_scan" substring provided was - # found in one of the BIDS tags - anat_scan_identifier = True - if anat_scan_identifier: - if len(bids_tags) > 1: - # if this fires, then there are other tags as well - # in addition to what was defined in the - # "anatomical_scan" field in the data settings, - # for example, we might be looking for only - # run-1, but we found acq-inv_run-1 instead - return data_dct - - # if we're dealing with a custom data directory format - else: - # TODO: more involved processing here? or not necessary? - pass + if anat_scan_identifier: + if len(bids_tags) > 1: + # if this fires, then there are other tags as well + # in addition to what was defined in the + # "anatomical_scan" field in the data settings, + # for example, we might be looking for only + # run-1, but we found acq-inv_run-1 instead + return data_dct + + # if we're dealing with a custom data directory format + else: + # TODO: more involved processing here? or not necessary? + pass # reduce the template down to only the sub-strings that do not have # these tags or IDs @@ -1113,48 +1176,42 @@ def update_data_dct( continue try: - id = new_path.split(part1, 1)[1] - id = id.split(part2, 1)[0] - except: - pass + id_value = new_path.split(part1, 1)[1] + id_value = id_value.split(part2, 1)[0] + except (IndexError, TypeError): + logger.error("Path split exception: %s // %s, %s", new_path, part1, part2) # example, ideally at this point, something like this: - # template: /path/to/sub-{participant}/etc. - # filepath: /path/to/sub-200/etc. - # label = {participant} - # id = '200' + # template = /path/to/sub-{participant}/etc. + # filepath = /path/to/sub-200/etc. + # label = {participant} + # id_value = '200' if label not in path_dct.keys(): - path_dct[label] = id + path_dct[label] = id_value skip = False - else: - if path_dct[label] != id: - warn = ( - "\n\n[!] WARNING: While parsing your input data " - "files, a file path was found with conflicting " - "IDs for the same data level.\n\n" - "File path: {0}\n" - "Level: {1}\n" - "Conflicting IDs: {2}, {3}\n\n" - "Thus, we can't tell which {4} it belongs to, and " - "whether this file should be included or excluded! " - "Therefore, this file has not been added to the " - "data configuration.".format( - file_path, - label, - path_dct[label], - id, - label.replace("{", "").replace("}", ""), - ) - ) - skip = True - break + elif path_dct[label] != id_value: + logger.warning( + "\n\n[!] WARNING: While parsing your input data files, a file path" + " was found with conflicting IDs for the same data level.\n\nFile" + " path: %s\nLevel: %s\nConflicting IDs: %s, %s\n\nThus, we can't" + " tell which %s it belongs to, and whether this file should be" + " included or excluded! Therefore, this file has not been added to" + " the data configuration.", + file_path, + label, + path_dct[label], + id_value, + label.replace("{", "").replace("}", ""), + ) + skip = True + break new_template = new_template.replace(part1, "", 1) new_template = new_template.replace(label, "", 1) new_path = new_path.replace(part1, "", 1) - new_path = new_path.replace(id, "", 1) + new_path = new_path.replace(id_value, "", 1) if skip: return data_dct @@ -1181,13 +1238,12 @@ def update_data_dct( if data_type not in ("anat", "brain_mask"): if "{scan}" in path_dct.keys(): scan_id = path_dct["{scan}"] + elif data_type == "func": + scan_id = "func-1" else: - if data_type == "func": - scan_id = "func-1" - else: - # field map files - keep these open as "None" so that they - # can be applied to all scans, if there isn't one specified - scan_id = None + # field map files - keep these open as "None" so that they + # can be applied to all scans, if there isn't one specified + scan_id = None if inclusion_dct: if "sites" in inclusion_dct.keys(): @@ -1258,42 +1314,31 @@ def update_data_dct( data_dct[site_id][sub_id][ses_id] = temp_sub_dct else: # doubt this ever happens, but just be safe - warn = ( - "\n\n[!] WARNING: Multiple site-participant-session " - "entries found for anatomical scans in your input data " - "directory.\n\nDuplicate sets:\n\n{0}\n\n{1}\n\nOnly " - "adding the first one to the data configuration file." - "\n\n".format(str(data_dct[site_id][sub_id][ses_id]), str(temp_sub_dct)) + logger.warning( + "\n\n[!] WARNING: Multiple site-participant-session entries found for" + " anatomical scans in your input data directory.\n\nDuplicate sets:" + "\n\n%s\n\n%s\n\nOnly adding the first one to the data configuration" + " file.\n\n", + data_dct[site_id][sub_id][ses_id], + temp_sub_dct, ) elif data_type == "freesurfer_dir": if site_id not in data_dct.keys(): - if verbose: - pass - return data_dct + return _no_anatomical_found("freesurfer", "site", site_id, file_path) if sub_id not in data_dct[site_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("freesurfer", "participant", sub_id, file_path) if ses_id not in data_dct[site_id][sub_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("freesurfer", "session", ses_id, file_path) data_dct[site_id][sub_id][ses_id]["anat"]["freesurfer_dir"] = file_path elif data_type == "brain_mask": if site_id not in data_dct.keys(): - if verbose: - pass - return data_dct + return _no_anatomical_found("brain mask", "site", site_id, file_path) if sub_id not in data_dct[site_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("brain mask", "participant", sub_id, file_path) if ses_id not in data_dct[site_id][sub_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("brain mask", "session", ses_id, file_path) data_dct[site_id][sub_id][ses_id]["brain_mask"] = file_path @@ -1314,17 +1359,11 @@ def update_data_dct( temp_func_dct[scan_id]["scan_parameters"] = scan_params if site_id not in data_dct.keys(): - if verbose: - pass - return data_dct + return _no_anatomical_found("functional", "site", site_id, file_path) if sub_id not in data_dct[site_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("functional", "participant", sub_id, file_path) if ses_id not in data_dct[site_id][sub_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found("functional", "session", ses_id, file_path) if "func" not in data_dct[site_id][sub_id][ses_id]: data_dct[site_id][sub_id][ses_id]["func"] = temp_func_dct @@ -1370,38 +1409,35 @@ def update_data_dct( temp_fmap_dct = {data_type: file_path} if site_id not in data_dct.keys(): - if verbose: - pass - return data_dct + return _no_anatomical_found("field map file", "site", site_id, file_path) if sub_id not in data_dct[site_id]: - if verbose: - pass - return data_dct + return _no_anatomical_found( + "field map file", "participant", sub_id, file_path + ) if ses_id not in data_dct[site_id][sub_id]: if verbose: for temp_ses in data_dct[site_id][sub_id]: if "anat" in data_dct[site_id][sub_id][temp_ses]: - warn = ( - "Field map file found for session {0}, but " - "the anatomical scan chosen for this " - "participant-session is for session {1}, " - "so this field map file is being " - "skipped:\n{2}\n".format(ses_id, temp_ses, file_path) - ) - warn = ( - "{0}\nIf you wish to use the anatomical " - "scan for session {1} for all participants " - "with this session instead, use the 'Which " - "Anatomical Scan?' option in the data " - "configuration builder (or populate the " - "'anatomical_scan' field in the data " - "settings file).\n".format(warn, ses_id) + logger.warning( + "Field map file found for session %s, but the anatomical" + " scan chosen for this participant-session is for session" + " %s, so this field map file is being skipped:\n%s\n\n\nIf" + " you wish to use the anatomical scan for session %s for" + " all participants with this session instead, use the" + " 'Which Anatomical Scan?' option in the data" + " configuration builder (or populate the 'anatomical_scan'" + " field in the data settings file).\n", + ses_id, + temp_ses, + file_path, + ses_id, ) break else: - warn = ( - "No anatomical found for field map file for " - "session {0}:\n{1}\n".format(ses_id, file_path) + logger.warning( + "No anatomical found for field map file for session %s:\n%s\n", + ses_id, + file_path, ) return data_dct @@ -1428,10 +1464,10 @@ def get_nonBIDS_data( inclusion_dct=None, exclusion_dct=None, sites_dct=None, - verbose=False, ): - """Prepare a data dictionary for the data configuration file when given - file path templates describing the input data directories. + """Prepare a data dictionary for the data configuration file... + + ...when given file path templates describing the input data directories. """ import fnmatch import glob @@ -1519,12 +1555,10 @@ def get_nonBIDS_data( err = ( "\n\n[!] No anatomical input file paths found given the data " "settings provided.\n\nAnatomical file template being used: " - "{0}\n".format(anat_glob) + f"{anat_glob}\n" ) if anat_scan: - err = "{0}Anatomical scan identifier provided: {1}" "\n\n".format( - err, anat_scan - ) + err = f"{err}Anatomical scan identifier provided: {anat_scan}\n\n" raise Exception(err) # pull out the site/participant/etc. IDs from each path and connect them @@ -1571,32 +1605,32 @@ def get_nonBIDS_data( ) if possible_anats: err = ( - "{0}There are some file paths found in the directories " + f"{err}There are some file paths found in the directories " "described in the data settings that may be anatomicals " - "that were missed. Here are a few examples:\n".format(err) + "that were missed. Here are a few examples:\n" ) for anat in possible_anats[0:5]: - err = "{0}{1}\n".format(err, anat) + err = f"{err}{anat}\n" err = ( - "{0}\nAnd here are some of the possible tags that were " + f"{err}\nAnd here are some of the possible tags that were " "found in the anatomical file paths that were grabbed:" - "\n".format(err) + "\n" ) for tag in tags[0:20]: - err = "{0}{1}\n".format(err, tag) + err = f"{err}{tag}\n" err = ( - "{0}\nCPAC only needs one anatomical scan defined for " + f"{err}\nCPAC only needs one anatomical scan defined for " "each participant-session. If there are multiple " "anatomical scans per participant-session, you can use " "the 'Which Anatomical Scan?' (anatomical_scan) " "parameter to choose which anatomical to " - "select.\n".format(err) + "select.\n" ) err = ( - "{0}\nIf you are already using the 'anatomical_scan' " + f"{err}\nIf you are already using the 'anatomical_scan' " "option in the data settings, check the setting to make " "sure you are properly selecting which anatomical scan " - "to use for your analysis.\n\n".format(err) + "to use for your analysis.\n\n" ) raise Exception(err) @@ -1632,8 +1666,8 @@ def get_nonBIDS_data( if fnmatch.fnmatch(filepath, freesurfer_glob): freesurfer_pool.append(filepath) else: - for dir in os.listdir(str(os.path.dirname(freesurfer_glob))): - freesurfer_pool.append(freesurfer_glob.replace("*", dir)) + for fsdir in os.listdir(str(os.path.dirname(freesurfer_glob))): + freesurfer_pool.append(freesurfer_glob.replace("*", fsdir)) for freesurfer_path in freesurfer_pool: data_dct = update_data_dct( @@ -1822,27 +1856,42 @@ def util_copy_template(template_type=None): ) shutil.copy(settings_template, settings_file) except Exception as exception: - raise Exception( + msg = ( f"\n[!] Could not write the {template_type} file " "template to the current directory.\n" - ) from exception + ) + raise IOError(msg) from exception - if type == "data_settings": - pass - elif type == "pipeline_config": - pass + logger.info( + "\nGenerated a default %s YAML file for editing:\n%s\n\n", + template_type, + settings_file, + ) + if template_type == "data_settings": + logger.info( + "This file can be completed and entered into the C-PAC command-line" + " interface to generate a data configuration file for individual-level" + " analysis by running 'cpac utils data_config build {data settings file}'." + "\n" + ) + elif template_type == "pipeline_config": + logger.info( + "This file can be edited and then used in a C-PAC run by running 'cpac run" + " $BIDS_DIR $OUTPUT_DIR participant --pipeline-file {pipeline config file" + "}'.\n" + ) -def run(data_settings_yml): - """Generate and write out a CPAC data configuration (participant list) - YAML file. - """ +def run(data_settings_yml: str): + """Generate and write a CPAC data configuration (participant list) YAML file.""" import os import yaml import CPAC + logger.info("\nGenerating data configuration file..") + settings_dct = yaml.safe_load(open(data_settings_yml, "r")) if ( @@ -1959,12 +2008,12 @@ def run(data_settings_yml): if len(data_dct) > 0: data_config_outfile = os.path.join( settings_dct["outputSubjectListLocation"], - "data_config_{0}.yml" "".format(settings_dct["subjectListName"]), + "data_config_{0}.yml".format(settings_dct["subjectListName"]), ) group_list_outfile = os.path.join( settings_dct["outputSubjectListLocation"], - "group_analysis_participants_{0}.txt" "".format( + "group_analysis_participants_{0}.txt".format( settings_dct["subjectListName"] ), ) @@ -1988,20 +2037,16 @@ def run(data_settings_yml): num_scan += 1 data_list.append(data_dct[site][sub][ses]) - group_list.append("{0}_{1}".format(sub, ses)) + group_list.append(f"{sub}_{ses}") # calculate numbers - len(set(included["site"])) - len(set(included["sub"])) + num_sites = len(set(included["site"])) + num_subs = len(set(included["sub"])) with open(data_config_outfile, "wt") as f: # Make sure YAML doesn't dump aliases (so it's more human # read-able) - f.write( - "# CPAC Data Configuration File\n# Version {0}" "\n".format( - CPAC.__version__ - ) - ) + f.write(f"# CPAC Data Configuration File\n# Version {CPAC.__version__}\n") f.write( "#\n# http://fcp-indi.github.io for more info.\n#\n" "# Tip: This file can be edited manually with " @@ -2016,18 +2061,34 @@ def run(data_settings_yml): with open(group_list_outfile, "wt") as f: # write the inclusion list (mainly the group analysis sublist) # text file - for id in sorted(group_list): - f.write("{0}\n".format(id)) + for group_id in sorted(group_list): + f.write(f"{group_id}\n") if os.path.exists(data_config_outfile): - pass + logger.info( + "\nCPAC DATA SETTINGS file entered (use this preset file to modify" + "/regenerate the data configuration file):\n%s\n\nNumber of:" + "\n...sites: %s\n...participants: %s\n...participant-sessions: %s" + "\n...functional scans: %s\n\nCPAC DATA CONFIGURATION file created" + " (use this for individual-level analysis):\n%s\n", + data_settings_yml, + num_sites, + num_subs, + num_sess, + num_scan, + data_config_outfile, + ) if os.path.exists(group_list_outfile): - pass + logger.info( + "Group-level analysis participant-session list text file created (use" + " this for group-level analysis):\n%s\n", + group_list_outfile, + ) else: err = ( - "\n\n[!] No anatomical input files were found given the data " - "settings provided.\n\n" + "\n\n[!] No anatomical input files were found given the data settings" + " provided.\n\n" ) - raise Exception(err) + raise FileNotFoundError(err) diff --git a/CPAC/utils/configuration/__init__.py b/CPAC/utils/configuration/__init__.py index 5a70b54d71..c7a9f7f829 100644 --- a/CPAC/utils/configuration/__init__.py +++ b/CPAC/utils/configuration/__init__.py @@ -17,10 +17,10 @@ """C-PAC Configuration module.""" from . import configuration, diff from .configuration import ( - Configuration, - Preconfiguration, check_pname, + Configuration, preconfig_yaml, + Preconfiguration, set_subject, ) diff --git a/CPAC/utils/configuration/configuration.py b/CPAC/utils/configuration/configuration.py index d74d593302..4bf197c5d8 100644 --- a/CPAC/utils/configuration/configuration.py +++ b/CPAC/utils/configuration/configuration.py @@ -26,7 +26,7 @@ import pkg_resources as p import yaml -from CPAC.utils.typing import TUPLE, ConfigKeyType +from CPAC.utils.typing import ConfigKeyType, TUPLE from .diff import dct_diff SPECIAL_REPLACEMENT_STRINGS = {r"${resolution_for_anat}", r"${func_resolution}"} @@ -303,9 +303,7 @@ def _update_attr(self): def check_path(key): if isinstance(key, str) and "/" in key: if not os.path.exists(key): - warn( - f"Invalid path- {key}. Please check your " "configuration file" - ) + warn(f"Invalid path- {key}. Please check your configuration file") attributes = [ (attr, getattr(self, attr)) @@ -393,7 +391,8 @@ def _check_if_switch(self, key: ConfigKeyType, error: bool = False) -> bool: if _answer: return _answer if error: - raise TypeError(f"`{key}` is not a switch in {self!s}.") + msg = f"`{key}` is not a switch in {self!s}." + raise TypeError(msg) return False def _switch_bool(self, key: ConfigKeyType, value: bool, exclusive: bool) -> bool: @@ -643,10 +642,13 @@ def preconfig_yaml(preconfig_name="default", load=False): from CPAC.pipeline import ALL_PIPELINE_CONFIGS, AVAILABLE_PIPELINE_CONFIGS if preconfig_name not in ALL_PIPELINE_CONFIGS: - raise BadParameter( + msg = ( f"The pre-configured pipeline name '{preconfig_name}' you " "provided is not one of the available pipelines.\n\nAvailable " - f"pipelines:\n{AVAILABLE_PIPELINE_CONFIGS!s}\n", + f"pipelines:\n{AVAILABLE_PIPELINE_CONFIGS!s}\n" + ) + raise BadParameter( + msg, param="preconfig", ) if load: diff --git a/CPAC/utils/configuration/diff.py b/CPAC/utils/configuration/diff.py index c27ad2ff34..c4d8ccb959 100644 --- a/CPAC/utils/configuration/diff.py +++ b/CPAC/utils/configuration/diff.py @@ -53,7 +53,8 @@ def dct_diff(dct1, dct2): _d = _d.dict() except AttributeError: # pylint: disable=raise-missing-from - raise TypeError(f"{_d} is not a dict.") + msg = f"{_d} is not a dict." + raise TypeError(msg) dcts.append(_d) dct1, dct2 = dcts # pylint: disable=unbalanced-tuple-unpacking del dcts diff --git a/CPAC/utils/configuration/yaml_template.py b/CPAC/utils/configuration/yaml_template.py index 0053de8605..e38422264d 100644 --- a/CPAC/utils/configuration/yaml_template.py +++ b/CPAC/utils/configuration/yaml_template.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -20,12 +20,14 @@ from hashlib import sha1 import os import re +from typing import Optional, Union from click import BadParameter import yaml -from CPAC.utils.configuration import Configuration, Preconfiguration, preconfig_yaml -from CPAC.utils.utils import YAML_BOOLS, update_config_dict, update_pipeline_values_1_8 +from CPAC.utils.configuration import Configuration, preconfig_yaml, Preconfiguration +from CPAC.utils.monitoring import UTLOGGER +from CPAC.utils.utils import update_config_dict, update_pipeline_values_1_8, YAML_BOOLS YAML_LOOKUP = {yaml_str: key for key, value in YAML_BOOLS.items() for yaml_str in value} @@ -50,7 +52,8 @@ class YamlTemplate: # pylint: disable=too-few-public-methods """ def __init__(self, original_yaml, base_config=None): - """ + """Initialize a YamlTemplate. + Parameters ---------- original_yaml : str @@ -79,8 +82,7 @@ def __init__(self, original_yaml, base_config=None): get_nested = Configuration.get_nested def dump(self, new_dict, parents=None): - """Dump a YAML file from a new dictionary with the comments from - the template dictionary. + """Dump YAML from a new dictionary with comments from template dictionary. Parameters ---------- @@ -205,7 +207,7 @@ def _parse_comments(self): def _count_indent(line): - """Helper method to determine indentation level. + """Determine indentation level. Parameters ---------- @@ -226,15 +228,15 @@ def _count_indent(line): def create_yaml_from_template( - d, # pylint: disable=invalid-name - template="default", - import_from=None, -): - """Save dictionary to a YAML file, keeping the structure - (such as first level comments and ordering) from the template. + d: Union[Configuration, dict], # pylint: disable=invalid-name + template: str = "default", + import_from: Optional[str] = None, +) -> str: + """Save dictionary to a YAML file, keeping the structure from the template. + + For example, first level comments and ordering. - It may not be fully robust to YAML structures, but it works - for C-PAC config files! + It may not be fully robust to YAML structures, but it works for C-PAC config files! Parameters ---------- @@ -246,6 +248,10 @@ def create_yaml_from_template( import_from : str, optional name of a preconfig. Full config is generated if omitted + Returns + ------- + str + Examples -------- >>> import yaml @@ -294,10 +300,11 @@ def create_yaml_from_template( def _format_list_items( - l, # noqa: E741 # pylint:disable=invalid-name - line_level, -): - """Helper method to handle lists in the YAML. + l: list, # noqa: E741 # pylint:disable=invalid-name + line_level: int, + short_list_length: int = 50, +) -> str: + """Handle lists in the YAML. Parameters ---------- @@ -327,7 +334,7 @@ def _format_list_items( # keep short, simple lists in square brackets if all(isinstance(item, (str, bool, int, float)) for item in l): preformat = str([yaml_bool(item) for item in l]) - if len(preformat) < 50: + if len(preformat) < short_list_length: return preformat.replace("'", "").replace('"', "") # list long or complex lists on lines with indented '-' lead-ins return "\n".join( @@ -342,8 +349,7 @@ def _format_list_items( def hash_data_config(sub_list): - """Function to generate a short SHA1 hash from a data config - subject list of dicts. + """Generate a short SHA1 hash from a data config subject list of dicts. Parameters ---------- @@ -373,7 +379,7 @@ def hash_data_config(sub_list): def indent(line_level, plus=2): - """Function to return an indent string for a given level. + """Return an indent string for a given level. Parameters ---------- @@ -389,7 +395,7 @@ def indent(line_level, plus=2): def yaml_bool(value): - """Helper function to give On/Off value to bools. + """Give On/Off value to bools. Parameters ---------- @@ -419,14 +425,14 @@ def yaml_bool(value): **{k: yaml_bool(value[k]) for k in value if k != "Name"}, } if isinstance(value, bool): - if value is True: + if value: return "On" return "Off" return value def upgrade_pipeline_to_1_8(path): - """Function to upgrade a C-PAC 1.7 pipeline config to C-PAC 1.8. + """Upgrade a C-PAC 1.7 pipeline config to C-PAC 1.8. Parameters ---------- @@ -447,6 +453,7 @@ def upgrade_pipeline_to_1_8(path): # back up original config now = datetime.isoformat(datetime.now()).replace(":", "_") backup = f"{path}.{now}.bak" + UTLOGGER.info("Backing up %s to %s and upgrading to C-PAC 1.8", path, backup) with open(path, "r", encoding="utf-8") as _f: original = _f.read() with open(backup, "w", encoding="utf-8") as _f: @@ -469,13 +476,15 @@ def upgrade_pipeline_to_1_8(path): def update_a_preconfig(preconfig, import_from): - """ + """Update a preconfig with comments from another config. + Parameters ---------- preconfig : str import_from : str """ + UTLOGGER.info("Updating %s preconfig…", preconfig) updated = create_yaml_from_template( Preconfiguration(preconfig), import_from=import_from ) diff --git a/CPAC/utils/create_flame_model_files.py b/CPAC/utils/create_flame_model_files.py index d77ebaaf5e..3cec99154d 100644 --- a/CPAC/utils/create_flame_model_files.py +++ b/CPAC/utils/create_flame_model_files.py @@ -1,3 +1,22 @@ +# Copyright (C) 2016-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from CPAC.utils.monitoring import WFLOGGER + + def create_dummy_string(length): ppstring = "" for i in range(0, length): @@ -126,6 +145,8 @@ def create_fts_file(ftest_list, con_names, model_name, current_output, out_dir): import numpy as np + WFLOGGER.info("\nFound f-tests in your model, writing f-tests file (.fts)..\n") + try: out_file = os.path.join(out_dir, model_name + ".fts") @@ -202,7 +223,8 @@ def create_con_ftst_file( evs = evs.rstrip("\r\n").split(",") if evs[0].strip().lower() != "contrasts": - raise Exception + msg = "first cell in contrasts file should contain 'Contrasts'" + raise ValueError(msg) # remove "Contrasts" label and replace it with "Intercept" # evs[0] = "Intercept" @@ -217,7 +239,8 @@ def create_con_ftst_file( try: contrasts_data = np.genfromtxt(con_file, names=True, delimiter=",", dtype=None) except: - raise Exception + msg = f"Could not successfully read in contrast file: {con_file}" + raise OSError(msg) lst = contrasts_data.tolist() # lst = list of rows of the contrast matrix (each row represents a @@ -291,27 +314,31 @@ def create_con_ftst_file( fts_n = fts_columns.T if len(column_names) != (num_EVs_in_con_file): - "\n\n[!] CPAC says: The number of EVs in your model " "design matrix (found in the %s.mat file) does not " "match the number of EVs (columns) in your custom " "contrasts matrix CSV file.\n\nCustom contrasts matrix " "file: %s\n\nNumber of EVs in design matrix: %d\n" "Number of EVs in contrasts file: %d\n\nThe column " "labels in the design matrix should match those in " "your contrasts .CSV file.\nColumn labels in design " "matrix:\n%s" % ( + WFLOGGER.error( + "\n\n[!] CPAC says: The number of EVs in your model design matrix (found" + " in the %s.mat file) does not match the number of EVs (columns) in your" + " custom contrasts matrix CSV file.\n\nCustom contrasts matrix file:" + " %s\n\nNumber of EVs in design matrix: %d\nNumber of EVs in contrasts" + " file: %d\n\nThe column labels in the design matrix should match those in" + "your contrasts .CSV file.\nColumn labels in design matrix:\n%s", model_name, con_file, len(column_names), num_EVs_in_con_file, str(column_names), ) - - # raise Exception(err_string) return None, None for design_mat_col, con_csv_col in zip(column_names, evs[1:]): if con_csv_col not in design_mat_col: - errmsg = ( - "\n\n[!] CPAC says: The names of the EVs in your " - "custom contrasts .csv file do not match the names or " - "order of the EVs in the design matrix. Please make " - "sure these are consistent.\nDesign matrix EV columns: " - "%s\nYour contrasts matrix columns: %s\n\n" % (column_names, evs[1:]) + WFLOGGER.error( + "\n\n[!] CPAC says: The names of the EVs in your custom contrasts .csv" + " file do not match the names or order of the EVs in the design" + " matrix. Please make sure these are consistent.\nDesign matrix EV" + " columns: %s\nYour contrasts matrix columns: %s\n\n", + column_names, + evs[1:], ) - return None, None out_file = os.path.join(output_dir, model_name + ".con") @@ -344,6 +371,7 @@ def create_con_ftst_file( ftest_out_file = None if fTest: + WFLOGGER.info("\nFound f-tests in your model, writing f-tests file (.fts)..\n") ftest_out_file = os.path.join(output_dir, model_name + ".fts") with open(ftest_out_file, "wt") as f: diff --git a/CPAC/utils/create_fsl_flame_preset.py b/CPAC/utils/create_fsl_flame_preset.py index 1951a8aa86..0954667839 100644 --- a/CPAC/utils/create_fsl_flame_preset.py +++ b/CPAC/utils/create_fsl_flame_preset.py @@ -1,3 +1,21 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from CPAC.utils.monitoring import FMLOGGER + # TODO: create a function that can help easily map raw pheno files that do not # TODO: have the participant_session id that CPAC uses @@ -44,7 +62,7 @@ def write_group_list_text_file(group_list, out_file=None): new_group_list.append(sub_ses_id) if not out_file: - out_file = os.path.join(os.getcwd(), "group_analysis_participant_" "list.txt") + out_file = os.path.join(os.getcwd(), "group_analysis_participant_list.txt") else: out_file = os.path.abspath(out_file) dir_path = out_file.split(os.path.basename(out_file))[0] @@ -53,10 +71,10 @@ def write_group_list_text_file(group_list, out_file=None): with open(out_file, "wt") as f: for part_id in new_group_list: - f.write("{0}\n".format(part_id)) + f.write(f"{part_id}\n") if os.path.exists(out_file): - pass + FMLOGGER.info("Group-level analysis participant list written:\n%s\n", out_file) return out_file @@ -83,7 +101,7 @@ def write_dataframe_to_csv(matrix_df, out_file=None): matrix_df.to_csv(out_file, index=False) if os.path.exists(out_file): - pass + FMLOGGER.info("CSV file written:\n%s\n", out_file) def write_config_dct_to_yaml(config_dct, out_file=None): @@ -101,7 +119,7 @@ def write_config_dct_to_yaml(config_dct, out_file=None): os.makedirs(dir_path) if not out_file.endswith(".yml"): - out_file = "{0}.yml".format(out_file) + out_file = f"{out_file}.yml" field_order = [ "pipeline_dir", @@ -140,7 +158,7 @@ def write_config_dct_to_yaml(config_dct, out_file=None): with open(out_file, "wt") as f: f.write( - "# CPAC Group-Level Analysis Configuration File\n" "# Version {0}\n".format( + "# CPAC Group-Level Analysis Configuration File\n# Version {0}\n".format( CPAC.__version__ ) ) @@ -156,7 +174,7 @@ def write_config_dct_to_yaml(config_dct, out_file=None): ) for key in field_order: val = config_dct[key] - f.write("{0}: {1}\n\n".format(key, val)) + f.write(f"{key}: {val}\n\n") if key == "FSLDIR": f.write( @@ -170,7 +188,9 @@ def write_config_dct_to_yaml(config_dct, out_file=None): ) if os.path.exists(out_file): - pass + FMLOGGER.info( + "Group-level analysis configuration YAML file written:\n%s\n", out_file + ) def create_design_matrix_df( @@ -235,7 +255,8 @@ def create_design_matrix_df( # file if not pheno_sub_label: # TODO: exception message - raise Exception("there's a pheno file, but no pheno sub label") + msg = "there's a pheno file, but no pheno sub label" + raise Exception(msg) else: # rename the pheno sub label thingy pheno_df = pheno_df.rename(columns={pheno_sub_label: "participant_id"}) @@ -286,12 +307,13 @@ def create_design_matrix_df( map_df["participant_id"] = new_sublist_subs break else: - raise Exception( + msg = ( "the participant IDs in your group " "analysis participant list and the " "participant IDs in your phenotype file " "do not match" ) + raise Exception(msg) # merge if pheno_ses_label: @@ -327,11 +349,12 @@ def create_contrasts_template_df(design_df, contrasts_dct_list=None): if (len(contrast_dct) - 1) != len(contrast_cols): # it's -1 because of the "contrast" column in contrast_dct # TODO: message - raise Exception( + msg = ( "number of columns in the contrast vector " "does not match the number of covariate " "columns in the design matrix" ) + raise Exception(msg) else: # if default, start it up with a blank "template" contrast vector @@ -421,17 +444,17 @@ def preset_single_group_avg( # create design and contrasts matrix file paths design_mat_path = os.path.join( - output_dir, model_name, "design_matrix_{0}.csv".format(model_name) + output_dir, model_name, f"design_matrix_{model_name}.csv" ) contrasts_mat_path = os.path.join( - output_dir, model_name, "contrasts_matrix_{0}.csv" "".format(model_name) + output_dir, model_name, f"contrasts_matrix_{model_name}.csv" ) # start group config yaml dictionary design_formula = "Group_Mean" if covariate: - design_formula = "{0} + {1}".format(design_formula, covariate) + design_formula = f"{design_formula} + {covariate}" group_config = { "pheno_file": design_mat_path, @@ -532,28 +555,31 @@ def preset_unpaired_two_group( # two groups encoded in this EV! if len(group_set) > 2: # TODO: message - raise Exception( + msg = ( "more than two groups provided, but this is a" "model for a two-group difference\n\ngroups " - "found in column:\n{0}".format(str(group_set)) + f"found in column:\n{group_set!s}" ) + raise Exception(msg) elif len(group_set) == 0: - raise Exception( + msg = ( "no groups were found - something went wrong " "with reading the phenotype information" ) + raise Exception(msg) elif len(group_set) == 1: - raise Exception( + msg = ( "only one group found in the column provided, " "but this is a model for a two-group difference" "\n\ngroups found in column:\n" - "{0}".format(str(group_set)) + f"{group_set!s}" ) + raise Exception(msg) # create the two new dummy-coded columns # column 1 # new column name - new_name = "{0}_{1}".format(groups[0], new_group_set[0]) + new_name = f"{groups[0]}_{new_group_set[0]}" # create new column encoded in 0's design_df[new_name] = 0 # map the relevant values into 1's @@ -565,7 +591,7 @@ def preset_unpaired_two_group( # column 2 # new column name - new_name = "{0}_{1}".format(groups[0], new_group_set[1]) + new_name = f"{groups[0]}_{new_group_set[1]}" # create new column encoded in 0's design_df[new_name] = 0 # map the relevant values into 1's @@ -582,8 +608,8 @@ def preset_unpaired_two_group( groups = new_groups # start the contrasts - contrast_one = {"Contrasts": "{0} - {1}".format(groups[0], groups[1])} - contrast_two = {"Contrasts": "{0} - {1}".format(groups[1], groups[0])} + contrast_one = {"Contrasts": f"{groups[0]} - {groups[1]}"} + contrast_two = {"Contrasts": f"{groups[1]} - {groups[0]}"} # make these loops in case we expand this to handle additional covariates # past the "prescribed" ones in the model/preset @@ -605,15 +631,15 @@ def preset_unpaired_two_group( # create design and contrasts matrix file paths design_mat_path = os.path.join( - output_dir, model_name, "design_matrix_{0}.csv".format(model_name) + output_dir, model_name, f"design_matrix_{model_name}.csv" ) contrasts_mat_path = os.path.join( - output_dir, model_name, "contrasts_matrix_{0}.csv" "".format(model_name) + output_dir, model_name, f"contrasts_matrix_{model_name}.csv" ) # start group config yaml dictionary - design_formula = "{0} + {1}".format(groups[0], groups[1]) + design_formula = f"{groups[0]} + {groups[1]}" group_config = { "pheno_file": design_mat_path, @@ -727,22 +753,20 @@ def preset_paired_two_group( for val in condition_ev[0 : (len(condition_ev) / 2) - 1]: if past_val: if val != past_val: - raise Exception( - "Non-equal amount of participants for each " "{0}.\n".format( - condition_type - ) + msg = ( + "Non-equal amount of participants for each " f"{condition_type}.\n" ) + raise Exception(msg) past_val = val # second half past_val = None for val in condition_ev[(len(condition_ev) / 2) :]: if past_val: if val != past_val: - raise Exception( - "Non-equal amount of participants for each " "{0}.\n".format( - condition_type - ) + msg = ( + "Non-equal amount of participants for each " f"{condition_type}.\n" ) + raise Exception(msg) past_val = val design_df[condition_type] = condition_ev @@ -751,13 +775,13 @@ def preset_paired_two_group( contrast_one = {} contrast_two = {} - design_formula = "{0}".format(condition_type) + design_formula = f"{condition_type}" # create the participant identity columns for sub_ses_id in design_df["participant_id"]: new_part_col = [] sub_id = sub_ses_id.split("_")[0] - new_part_label = "participant_{0}".format(sub_id) + new_part_label = f"participant_{sub_id}" for moving_sub_ses_id in design_df["participant_id"]: moving_sub_id = moving_sub_ses_id.split("_")[0] if moving_sub_id == sub_id: @@ -768,7 +792,7 @@ def preset_paired_two_group( contrast_one.update({new_part_label: 0}) contrast_two.update({new_part_label: 0}) if new_part_label not in design_formula: - design_formula = "{0} + {1}".format(design_formula, new_part_label) + design_formula = f"{design_formula} + {new_part_label}" # finish the contrasts # should be something like @@ -803,11 +827,11 @@ def preset_paired_two_group( # create design and contrasts matrix file paths design_mat_path = os.path.join( - output_dir, model_name, "design_matrix_{0}.csv".format(model_name) + output_dir, model_name, f"design_matrix_{model_name}.csv" ) contrasts_mat_path = os.path.join( - output_dir, model_name, "contrasts_matrix_{0}.csv" "".format(model_name) + output_dir, model_name, f"contrasts_matrix_{model_name}.csv" ) # start group config yaml dictionary @@ -859,7 +883,8 @@ def preset_tripled_two_group( if len(conditions) != 3: # TODO: msg - raise Exception("Three conditions are required for the tripled " "t-test.\n") + msg = "Three conditions are required for the tripled t-test.\n" + raise Exception(msg) sess_conditions = ["session", "Session", "sessions", "Sessions"] scan_conditions = ["scan", "scans", "series", "Series/Scans", "Series"] @@ -963,8 +988,8 @@ def preset_tripled_two_group( raise Exception # label the two covariate columns which encode the three conditions - column_one = "{0}_column_one".format(condition_type) - column_two = "{0}_column_two".format(condition_type) + column_one = f"{condition_type}_column_one" + column_two = f"{condition_type}_column_two" design_df[column_one] = condition_ev_one design_df[column_two] = condition_ev_two @@ -974,12 +999,12 @@ def preset_tripled_two_group( contrast_two = {} contrast_three = {} - design_formula = "{0} + {1}".format(column_one, column_two) + design_formula = f"{column_one} + {column_two}" # create the participant identity columns for sub_id in design_df["participant_id"]: new_part_col = [] - new_part_label = "participant_{0}".format(sub_id) + new_part_label = f"participant_{sub_id}" for moving_sub_ses_id in design_df["participant_id"]: moving_sub_id = moving_sub_ses_id.split("_")[0] if moving_sub_id == sub_id: @@ -991,7 +1016,7 @@ def preset_tripled_two_group( contrast_two.update({new_part_label: 0}) contrast_three.update({new_part_label: 0}) if new_part_label not in design_formula: - design_formula = "{0} + {1}".format(design_formula, new_part_label) + design_formula = f"{design_formula} + {new_part_label}" # finish the contrasts # should be something like @@ -1036,11 +1061,11 @@ def preset_tripled_two_group( # create design and contrasts matrix file paths design_mat_path = os.path.join( - output_dir, model_name, "design_matrix_{0}.csv".format(model_name) + output_dir, model_name, f"design_matrix_{model_name}.csv" ) contrasts_mat_path = os.path.join( - output_dir, model_name, "contrasts_matrix_{0}.csv" "".format(model_name) + output_dir, model_name, f"contrasts_matrix_{model_name}.csv" ) # start group config yaml dictionary @@ -1097,7 +1122,7 @@ def run( except Exception as e: err = ( "\n[!] Could not access or read the cpac_outputs.csv " - "resource file:\n{0}\n\nError details {1}\n".format(keys_csv, e) + f"resource file:\n{keys_csv}\n\nError details {e}\n" ) raise Exception(err) @@ -1115,11 +1140,13 @@ def run( if pheno_file and not pheno_sub_label: # TODO: message - raise Exception("pheno file provided, but no pheno sub label") + msg = "pheno file provided, but no pheno sub label" + raise Exception(msg) if pheno_sub_label and not pheno_file: # TODO: message - raise Exception("pheno sub label provided, but no pheno file") + msg = "pheno sub label provided, but no pheno file" + raise Exception(msg) try: if "None" in group_list_text_file or "none" in group_list_text_file: @@ -1137,7 +1164,7 @@ def run( group_list_text_file = os.path.join( output_dir, model_name, - "group_participant_list_" "{0}.txt".format(model_name), + "group_participant_list_" f"{model_name}.txt", ) elif isinstance(group_list_text_file, list): @@ -1148,7 +1175,7 @@ def run( group_list_text_file = os.path.join( output_dir, model_name, - "group_participant_list_" "{0}.txt".format(model_name), + "group_participant_list_" f"{model_name}.txt", ) elif os.path.isfile(group_list_text_file): group_list = read_group_list_text_file(group_list_text_file) @@ -1158,7 +1185,7 @@ def run( group_list_text_file = os.path.join( output_dir, model_name, - "group_participant_list_" "{0}.txt".format(model_name), + "group_participant_list_" f"{model_name}.txt", ) if len(group_list) == 0: @@ -1167,7 +1194,7 @@ def run( "directory you provided. Make sure the directory is the " "individual-level pipeline directory that contains the sub-" "directories labeled with the participant_session IDs.\n\n" - "Pipeline directory provided: {0}\n\n".format(pipeline_dir) + f"Pipeline directory provided: {pipeline_dir}\n\n" ) raise Exception(msg) @@ -1213,11 +1240,13 @@ def run( elif preset == "single_grp_cov": if not pheno_file: # TODO: message - raise Exception("pheno file not provided") + msg = "pheno file not provided" + raise Exception(msg) if not covariate: # TODO: message - raise Exception("covariate not provided") + msg = "covariate not provided" + raise Exception(msg) pheno_df = read_pheno_csv_into_df(pheno_file, pheno_sub_label) @@ -1235,11 +1264,13 @@ def run( elif preset == "unpaired_two": if not pheno_file: # TODO: message - raise Exception("pheno file not provided") + msg = "pheno file not provided" + raise Exception(msg) if not covariate: # TODO: message - raise Exception("the two groups were not provided") + msg = "the two groups were not provided" + raise Exception(msg) # we're assuming covariate will be coming in as a string of either one # covariate name, or a string with two covariates separated by a comma @@ -1270,14 +1301,16 @@ def run( if not covariate: # TODO: message - raise Exception("the two conditions were not provided") + msg = "the two conditions were not provided" + raise Exception(msg) if not condition_type: # TODO: message - raise Exception( + msg = ( "you didn't specify whether the two groups are " "sessions or series/scans" ) + raise Exception(msg) # we're assuming covariate (which in this case, is the two sessions, # or two scans) will be coming in as a string of either one covariate @@ -1309,14 +1342,16 @@ def run( if not covariate: # TODO: message - raise Exception("the three conditions were not provided") + msg = "the three conditions were not provided" + raise Exception(msg) if not condition_type: # TODO: message - raise Exception( + msg = ( "you didn't specify whether the three groups are " "sessions or series/scans" ) + raise Exception(msg) # we're assuming covariate (which in this case, is the three sessions, # or three scans) will be coming in as a string of either one @@ -1341,7 +1376,8 @@ def run( else: # TODO: not a real preset! - raise Exception("not one of the valid presets") + msg = "not one of the valid presets" + raise Exception(msg) # write participant list text file write_group_list_text_file(design_df["participant_id"], group_list_text_file) @@ -1353,9 +1389,7 @@ def run( write_dataframe_to_csv(contrasts_df, group_config["custom_contrasts"]) # write group-level analysis config YAML - out_config = os.path.join( - output_dir, model_name, "group_config_{0}.yml".format(model_name) - ) + out_config = os.path.join(output_dir, model_name, f"group_config_{model_name}.yml") write_config_dct_to_yaml(group_config, out_config) if run: diff --git a/CPAC/utils/create_fsl_model.py b/CPAC/utils/create_fsl_model.py index 35316e4ac7..7cf69659c8 100644 --- a/CPAC/utils/create_fsl_model.py +++ b/CPAC/utils/create_fsl_model.py @@ -1,3 +1,22 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from CPAC.utils.monitoring import IFLOGGER + + def load_pheno_file(pheno_file): import os @@ -362,7 +381,13 @@ def model_group_var_separately( grouping_var, formula, pheno_data_dict, ev_selections, coding_scheme ): if grouping_var is None or grouping_var not in formula: - raise Exception + msg = ( + "\n\n[!] CPAC says: Model group variances separately is enabled, but the" + " grouping variable set is either set to None, or was not included in the" + f" model as one of the EVs.\n\nDesign formula: {formula}\nGrouping" + f" variable: {grouping_var}\n\n" + ) + raise ValueError(msg) # do this a little early for the grouping variable so that it doesn't # get in the way of doing this for the other EVs once they have the @@ -471,18 +496,33 @@ def model_group_var_separately( def check_multicollinearity(matrix): import numpy as np + IFLOGGER.info("\nChecking for multicollinearity in the model..") + U, s, V = np.linalg.svd(matrix) max_singular = np.max(s) min_singular = np.min(s) + IFLOGGER.info( + "Max singular: %s\nMin singular: %s\nRank: %s\n\n", + max_singular, + min_singular, + np.linalg.matrix_rank(matrix), + ) + + _warning = ( + "[!] CPAC warns: Detected multicollinearity in the computed group-level" + " analysis model. Please double-check your model design.\n\n" + ) + if min_singular == 0: - pass + IFLOGGER.warning(_warning) else: condition_number = float(max_singular) / float(min_singular) + IFLOGGER.info("Condition number: %f\n\n", condition_number) if condition_number > 30: - pass + IFLOGGER.warning(_warning) def write_mat_file( @@ -805,8 +845,17 @@ def create_design_matrix( try: dmatrix = patsy.dmatrix(formula, pheno_data_dict, NA_action="raise") - except: - raise Exception + except Exception as e: + msg = ( + "\n\n[!] CPAC says: Design matrix creation wasn't successful - do the" + " terms in your formula correctly correspond to the EVs listed in your" + " phenotype file?\nPhenotype file provided: %s\n\nPhenotypic data" + " columns (regressors): %s\nFormula: %s\n\n", + pheno_file, + list(pheno_data_dict.keys()), + formula, + ) + raise RuntimeError(msg) from e # check the model for multicollinearity - Patsy takes care of this, but # just in case @@ -892,7 +941,7 @@ def positive(dmat, a, coding, group_sep, grouping_var): evs = dmat.design_info.column_name_indexes con = np.zeros(dmat.shape[1]) - if group_sep is True: + if group_sep: if "__" in a and grouping_var in a: ev_desc = a.split("__") @@ -976,6 +1025,8 @@ def create_dummy_string(length): def create_con_file(con_dict, col_names, file_name, current_output, out_dir): import os + IFLOGGER.info("col names: %s", col_names) + with open(os.path.join(out_dir, file_name) + ".con", "w+") as f: # write header num = 1 @@ -1012,6 +1063,7 @@ def create_fts_file(ftest_list, con_dict, model_name, current_output, out_dir): import numpy as np try: + IFLOGGER.info("\nFound f-tests in your model, writing f-tests file (.fts)..\n") with open(os.path.join(out_dir, model_name + ".fts"), "w") as f: print("/NumWaves\t", len(con_dict), file=f) print("/NumContrasts\t", len(ftest_list), file=f) @@ -1089,6 +1141,7 @@ def create_con_ftst_file( # evs[0] = "Intercept" fTest = False + IFLOGGER.info("evs: %s", evs) for ev in evs: if "f_test" in ev: count_ftests += 1 @@ -1099,8 +1152,9 @@ def create_con_ftst_file( try: data = np.genfromtxt(con_file, names=True, delimiter=",", dtype=None) - except: - raise Exception + except Exception as e: + msg = f"Error: Could not successfully read in contrast file: {con_file}" + raise OSError(msg) from e lst = data.tolist() @@ -1218,6 +1272,7 @@ def create_con_ftst_file( np.savetxt(f, contrasts, fmt="%1.5e", delimiter="\t") if fTest: + IFLOGGER.info("\nFound f-tests in your model, writing f-tests file (.fts)..\n") ftest_out_dir = os.path.join(output_dir, model_name + ".fts") with open(ftest_out_dir, "wt") as f: @@ -1263,7 +1318,7 @@ def process_contrast( # are being modeled separately, and we don't want the EV # that is the grouping variable (which is now present in # other EV names) to confound this operation - if group_sep is True: + if group_sep: gpvar = grouping_var else: gpvar = "..." @@ -1361,8 +1416,13 @@ def run( try: if not os.path.exists(output_dir): os.makedirs(output_dir) - except: - raise Exception + except Exception as e: + msg = ( + "\n\n[!] CPAC says: Could not successfully create the group analysis" + f" output directory:\n{output_dir}\n\nMake sure you have write access" + " in this file structure.\n\n\n" + ) + raise OSError(msg) from e measure_dict = {} @@ -1487,7 +1547,7 @@ def run( if len(contrast_items) > 2: idx = 0 - for item in contrast_items: + for _item in contrast_items: # they need to be put back into Patsy formatted header # titles because the dmatrix gets passed into the function # that writes out the contrast matrix @@ -1551,6 +1611,10 @@ def run( or (custom_contrasts == "") or ("None" in custom_contrasts) ): + IFLOGGER.info( + "Writing contrasts file (.con) based on contrasts provided using the group" + " analysis model builder's contrasts editor.." + ) create_con_file( contrasts_dict, regressor_names, model_name, current_output, model_out_dir ) @@ -1561,6 +1625,11 @@ def run( ) else: + IFLOGGER.info( + "\nWriting contrasts file (.con) based on contrasts provided with a custom" + " contrasts matrix CSV file..\n" + ) + create_con_ftst_file( custom_contrasts, model_name, diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index a3f52e64a9..ab8ef4bf26 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,13 +14,14 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Utilities for sourcing data.""" import csv import json from pathlib import Path import re from typing import Union -from nipype import logging +from voluptuous import RequiredFieldInvalid from nipype.interfaces import utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -28,15 +29,13 @@ from CPAC.utils import function from CPAC.utils.bids_utils import bids_remove_entity from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import FMLOGGER from CPAC.utils.typing import TUPLE from CPAC.utils.utils import get_scan_params -logger = logging.getLogger("nipype.workflow") - def bidsier_prefix(unique_id): - """ - Function to return a BIDSier prefix for a given unique_id. + """Return a BIDSier prefix for a given unique_id. Parameters ---------- @@ -67,8 +66,7 @@ def bidsier_prefix(unique_id): def get_rest(scan, rest_dict, resource="scan"): - """Return the file path of the chosen resource stored in the functional - file dictionary, if it exists. + """Return the path of the chosen resource in the functional file dictionary. scan: the scan/series name or label rest_dict: the dictionary read in from the data configuration YAML file @@ -88,23 +86,20 @@ def get_rest(scan, rest_dict, resource="scan"): def extract_scan_params_dct(scan_params_dct): + """Extract the scan parameters dictionary from the data configuration file.""" return scan_params_dct -def get_map(map, map_dct): - # return the spatial map required - return map_dct[map] - - def select_model_files(model, ftest, model_name): - """Method to select model files.""" + """Select model files.""" import glob import os files = glob.glob(os.path.join(model, "*")) if len(files) == 0: - raise Exception("No files found inside directory %s" % model) + msg = f"No files found inside directory {model}" + raise FileNotFoundError(msg) fts_file = "" @@ -118,25 +113,25 @@ def select_model_files(model, ftest, model_name): elif (model_name + ".con") in filename: con_file = filename - if ftest is True and fts_file == "": + if ftest and fts_file == "": errmsg = ( "\n[!] CPAC says: You have f-tests included in your group " - "analysis model '%s', but no .fts files were found in the " - "output folder specified for group analysis: %s.\n\nThe " + f"analysis model '{model_name}', but no .fts files were found in the " + f"output folder specified for group analysis: {model}.\n\nThe " ".fts file is automatically generated by CPAC, and if you " "are seeing this error, it is because something went wrong " "with the generation of this file, or it has been moved." - "\n\n" % (model_name, model) ) - raise Exception(errmsg) + raise FileNotFoundError(errmsg) return fts_file, con_file, grp_file, mat_file def check_func_scan(func_scan_dct, scan): - """Run some checks on the functional timeseries-related files for a given - series/scan name or label. + """Run some checks on the functional timeseries-related files. + + For a given series/scan name or label. """ scan_resources = func_scan_dct[scan] @@ -152,30 +147,32 @@ def check_func_scan(func_scan_dct, scan): " scan parameters: /path/to/scan_params.json\n\n" "See the User Guide for more information.\n\n" ) - raise Exception(err) + raise ValueError(err) # actual 4D time series file if "scan" not in scan_resources.keys(): err = ( - "\n\n[!] The {0} scan is missing its actual time-series " + f"\n\n[!] The {scan} scan is missing its actual time-series " "scan file, which should be a filepath labeled with the " - "'scan' key.\n\n".format(scan) + "'scan' key.\n\n" ) - raise Exception(err) + raise FileNotFoundError(err) # Nipype restriction (may have changed) if "." in scan or "+" in scan or "*" in scan: - raise Exception( + msg = ( "\n\n[!] Scan names cannot contain any special " "characters (., +, *, etc.). Please update this " - "and try again.\n\nScan: {0}" - "\n\n".format(scan) + f"and try again.\n\nScan: {scan}" + "\n\n" ) + raise ValueError(msg) def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"): - """Return the functional timeseries-related file paths for each - series/scan, from the dictionary of functional files described in the data + """Return the functional timeseries-related file paths for each series/scan... + + ...from the dictionary of functional files described in the data configuration (sublist) YAML file. Scan input (from inputnode) is an iterable. @@ -286,8 +283,10 @@ def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"): def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): - """Return the field map files, from the dictionary of functional files - described in the data configuration (sublist) YAML file. + """Return the field map files... + + ...from the dictionary of functional files described in the data configuration + (sublist) YAML file. """ import nipype.interfaces.utility as util @@ -377,6 +376,7 @@ def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): def get_fmap_phasediff_metadata(data_config_scan_params): + """Return the scan parameters for a field map phasediff scan.""" if ( not isinstance(data_config_scan_params, dict) and ".json" in data_config_scan_params @@ -429,15 +429,19 @@ def calc_delta_te_and_asym_ratio( ees_asym_ratio : float """ if not isinstance(effective_echo_spacing, float): - raise LookupError( + msg = ( "C-PAC could not find `EffectiveEchoSpacing` in " "either fmap or func sidecar JSON, but that field " "is required for PhaseDiff distortion correction." ) + raise LookupError(msg) # convert into milliseconds if necessary # these values will/should never be more than 10ms - if ((echo_times[0] * 1000) < 10) and ((echo_times[1] * 1000) < 10): + if ( + ((echo_times[0] * 1000) < 10) # noqa: PLR2004 + and ((echo_times[1] * 1000) < 10) # noqa: PLR2004 + ): echo_times[0] = echo_times[0] * 1000 echo_times[1] = echo_times[1] * 1000 @@ -447,15 +451,16 @@ def calc_delta_te_and_asym_ratio( def gather_echo_times(echotime_1, echotime_2, echotime_3=None, echotime_4=None): + """Gather the echo times from the field map data.""" echotime_list = [echotime_1, echotime_2, echotime_3, echotime_4] echotime_list = list(filter(lambda item: item is not None, echotime_list)) echotime_list = list(set(echotime_list)) - if len(echotime_list) != 2: - raise Exception( - "\n[!] Something went wrong with the field map echo " - "times - there should be two distinct values.\n\n" - f"Echo Times:\n{echotime_list}\n" + if len(echotime_list) != 2: # noqa: PLR2004 + msg = ( + "\n[!] Something went wrong with the field map echo times - there should" + f" be two distinct values.\n\nEcho Times:\n{echotime_list}\n" ) + raise ValueError(msg) return echotime_list @@ -466,7 +471,9 @@ def match_epi_fmaps( epi_fmap_two=None, epi_fmap_params_two=None, ): - """Parse the field map files in the data configuration and determine which + """Match EPI field maps to the BOLD scan. + + Parse the field map files in the data configuration and determine which ones have the same and opposite phase-encoding directions as the BOLD scan in the current pipeline. @@ -518,6 +525,7 @@ def ingress_func_metadata( unique_id=None, num_strat=None, ): + """Ingress metadata for functional scans.""" name_suffix = "" for suffix_part in (unique_id, num_strat): if suffix_part is not None: @@ -529,18 +537,18 @@ def ingress_func_metadata( fmap_TE_list = [] if "fmap" in sub_dict: second = False - for key in sub_dict["fmap"]: + for orig_key in sub_dict["fmap"]: gather_fmap = create_fmap_datasource( - sub_dict["fmap"], f"fmap_gather_{key}_{subject_id}" + sub_dict["fmap"], f"fmap_gather_{orig_key}_{subject_id}" ) gather_fmap.inputs.inputnode.set( subject=subject_id, creds_path=input_creds_path, dl_dir=cfg.pipeline_setup["working_directory"]["path"], ) - gather_fmap.inputs.inputnode.scan = key + gather_fmap.inputs.inputnode.scan = orig_key - orig_key = key + key = orig_key if "epi" in key and not second: key = "epi-1" second = True @@ -801,6 +809,7 @@ def ingress_func_metadata( def create_general_datasource(wf_name): + """Create a general-purpose datasource node.""" import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -844,6 +853,7 @@ def create_general_datasource(wf_name): def create_check_for_s3_node( name, file_path, img_type="other", creds_path=None, dl_dir=None, map_node=False ): + """Create a node to check if a file is on S3.""" if map_node: check_s3_node = pe.MapNode( function.Function( @@ -853,7 +863,7 @@ def create_check_for_s3_node( as_module=True, ), iterfield=["file_path"], - name="check_for_s3_%s" % name, + name=f"check_for_s3_{name}", ) else: check_s3_node = pe.Node( @@ -863,7 +873,7 @@ def create_check_for_s3_node( function=check_for_s3, as_module=True, ), - name="check_for_s3_%s" % name, + name=f"check_for_s3_{name}", ) check_s3_node.inputs.set( @@ -873,10 +883,10 @@ def create_check_for_s3_node( return check_s3_node -# Check if passed-in file is on S3 def check_for_s3( file_path, creds_path=None, dl_dir=None, img_type="other", verbose=False ): + """Check if passed-in file is on S3.""" # Import packages import os @@ -919,41 +929,45 @@ def check_for_s3( os.makedirs(local_dir, exist_ok=True) if os.path.exists(local_path): - pass + FMLOGGER.info("%s already exists- skipping download.", local_path) else: # Download file try: bucket = fetch_creds.return_bucket(creds_path, bucket_name) + FMLOGGER.info("Attempting to download from AWS S3: %s", file_path) bucket.download_file(Key=s3_key, Filename=local_path) except botocore.exceptions.ClientError as exc: error_code = int(exc.response["Error"]["Code"]) err_msg = str(exc) - if error_code == 403: + if error_code == 403: # noqa: PLR2004 err_msg = ( - 'Access to bucket: "%s" is denied; using credentials ' - 'in subject list: "%s"; cannot access the file "%s"' - % (bucket_name, creds_path, file_path) + f'Access to bucket: "{bucket_name}" is denied; using' + f' credentials in subject list: "{creds_path}"; cannot access' + f' the file "{file_path}"' ) - elif error_code == 404: + error_type = PermissionError + elif error_code == 404: # noqa: PLR2004 err_msg = ( - "File: {0} does not exist; check spelling and try " - "again".format(os.path.join(bucket_name, s3_key)) + f"File: {os.path.join(bucket_name, s3_key)} does not exist;" + " check spelling and try again" ) + error_type = FileNotFoundError else: err_msg = ( - 'Unable to connect to bucket: "%s". Error message:\n%s' - % (bucket_name, exc) + f'Unable to connect to bucket: "{bucket_name}". Error message:' + f"\n{exc}" ) + error_type = ConnectionError - raise Exception(err_msg) + raise error_type(err_msg) except Exception as exc: - err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' % ( - bucket_name, - exc, + err_msg = ( + f'Unable to connect to bucket: "{bucket_name}". Error message:' + f"\n{exc}" ) - raise Exception(err_msg) + raise ConnectionError(err_msg) # Otherwise just return what was passed in, resolving if a link else: @@ -979,51 +993,48 @@ def check_for_s3( ] ) if local_path in ndmg_atlases["v0"]: - raise FileNotFoundError( - "".join( - [ - "Neuroparc atlas paths were updated on July 20, 2020. " - "C-PAC configuration files using Neuroparc v0 atlas paths " - "(including C-PAC default and preconfigured pipeline " - "configurations from v1.6.2a and earlier) need to be " - "updated to use Neuroparc atlases. Your current " - "configuration includes the Neuroparc v0 path " - f"{local_path} which needs to be updated to ", - ndmg_atlases["v1"][ndmg_atlases["v0"].index(local_path)], - ". For a full list such paths, see https://fcp-indi." - "github.io/docs/nightly/user/ndmg_atlases", - ] - ) + from CPAC.utils.docs import DOCS_URL_PREFIX + + msg = ( + "Neuroparc atlas paths were updated on July 20, 2020. C-PAC" + " configuration files using Neuroparc v0 atlas paths (including C-PAC" + " default and preconfigured pipeline configurations from v1.6.2a and" + " earlier) need to be updated to use Neuroparc atlases. Your current" + f" configuration includes the Neuroparc v0 path {local_path} which" + " needs to be updated to" + f" {ndmg_atlases['v1'][ndmg_atlases['v0'].index(local_path)]}. For a" + f" full list such paths, see {DOCS_URL_PREFIX}/user/ndmg_atlases" ) else: - raise FileNotFoundError(f"File {local_path} does not exist!") + msg = f"File {local_path} does not exist!" + raise FileNotFoundError(msg) if verbose: - pass + FMLOGGER.info("Downloaded file:\n%s\n", local_path) # Check image dimensionality if local_path.endswith(".nii") or local_path.endswith(".nii.gz"): img_nii = nib.load(local_path) if img_type == "anat": - if len(img_nii.shape) != 3: - raise IOError( - "File: %s must be an anatomical image with 3 " - "dimensions but %d dimensions found!" - % (local_path, len(img_nii.shape)) + if len(img_nii.shape) != 3: # noqa: PLR2004 + msg = ( + f"File: {local_path} must be an anatomical image with 3 " + f"dimensions but {len(img_nii.shape)} dimensions found!" ) elif img_type == "func": if len(img_nii.shape) not in [3, 4]: - raise IOError( - "File: %s must be a functional image with 3 or " - "4 dimensions but %d dimensions found!" - % (local_path, len(img_nii.shape)) + msg = ( + f"File: {local_path} must be a functional image with 3 or " + f"4 dimensions but {len(img_nii.shape)} dimensions found!" ) + raise IOError(msg) return local_path def gather_extraction_maps(c): + """Gather the timeseries and SCA analysis configurations.""" ts_analysis_dict = {} sca_analysis_dict = {} @@ -1048,7 +1059,7 @@ def gather_extraction_maps(c): "set to run, but no ROI NIFTI file paths were " "provided!\n\n" ) - raise Exception(err) + raise RequiredFieldInvalid(err) if c.seed_based_correlation_analysis["run"]: try: @@ -1059,13 +1070,13 @@ def gather_extraction_maps(c): "is set to run, but no ROI NIFTI file paths were " "provided!\n\n" ) - raise Exception(err) + raise RequiredFieldInvalid(err) # flip the dictionary for roi_path in sca_roi_dict.keys(): # update analysis dict - for analysis_type in sca_roi_dict[roi_path].split(","): - analysis_type = analysis_type.replace(" ", "") + for _analysis_type in sca_roi_dict[roi_path].split(","): + analysis_type = _analysis_type.replace(" ", "") if analysis_type not in sca_analysis_dict.keys(): sca_analysis_dict[analysis_type] = [] @@ -1076,7 +1087,9 @@ def gather_extraction_maps(c): def get_highest_local_res(template: Union[Path, str], tagname: str) -> Path: - """Given a reference template path and a resolution string, get all + """Return the highest resolution of a template in the same local path. + + Given a reference template path and a resolution string, get all resolutions of that template in the same local path and return the highest resolution. @@ -1124,12 +1137,12 @@ def get_highest_local_res(template: Union[Path, str], tagname: str) -> Path: try: return matching_templates[0] except (FileNotFoundError, IndexError): - raise LookupError(f"Could not find template {template}") + msg = f"Could not find template {template}" + raise LookupError(msg) def res_string_to_tuple(resolution): - """ - Converts a resolution string to a tuple of floats. + """Convert a resolution string to a tuple of floats. Parameters ---------- @@ -1147,6 +1160,7 @@ def res_string_to_tuple(resolution): def resolve_resolution(resolution, template, template_name, tag=None): + """Resample a template to a given resolution.""" from nipype.interfaces import afni from CPAC.pipeline import nipype_pipeline_engine as pe @@ -1200,6 +1214,7 @@ def resolve_resolution(resolution, template, template_name, tag=None): def create_anat_datasource(wf_name="anat_datasource"): + """Create a dataflow for anatomical images.""" import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -1241,12 +1256,13 @@ def create_anat_datasource(wf_name="anat_datasource"): def create_roi_mask_dataflow(masks, wf_name="datasource_roi_mask"): + """Create a dataflow for ROI masks.""" import os mask_dict = {} - for mask_file in masks: - mask_file = mask_file.rstrip("\r\n") + for _mask_file in masks: + mask_file = _mask_file.rstrip("\r\n") if mask_file.strip() == "" or mask_file.startswith("#"): continue @@ -1270,11 +1286,12 @@ def create_roi_mask_dataflow(masks, wf_name="datasource_roi_mask"): except IndexError: # pylint: disable=raise-missing-from - raise ValueError( + msg = ( "Error in spatial_map_dataflow: File " f'extension of {base_file} not ".nii" or ' ".nii.gz" ) + raise ValueError(msg) except Exception as e: raise e @@ -1282,10 +1299,11 @@ def create_roi_mask_dataflow(masks, wf_name="datasource_roi_mask"): base_name = format_identifier(name, desc) if base_name in mask_dict: - raise ValueError( + msg = ( "Duplicate templates/atlases not allowed: " f"{mask_file} {mask_dict[base_name]}" ) + raise ValueError(msg) mask_dict[base_name] = mask_file @@ -1332,14 +1350,15 @@ def create_roi_mask_dataflow(masks, wf_name="datasource_roi_mask"): def create_spatial_map_dataflow(spatial_maps, wf_name="datasource_maps"): + """Create a dataflow for spatial maps.""" import os wf = pe.Workflow(name=wf_name) spatial_map_dict = {} - for spatial_map_file in spatial_maps: - spatial_map_file = spatial_map_file.rstrip("\r\n") + for _spatial_map_file in spatial_maps: + spatial_map_file = _spatial_map_file.rstrip("\r\n") base_file = os.path.basename(spatial_map_file) try: @@ -1352,18 +1371,19 @@ def create_spatial_map_dataflow(spatial_maps, wf_name="datasource_maps"): ) if base_name in spatial_map_dict: - raise ValueError( - "Files with same name not allowed: %s %s" - % (spatial_map_file, spatial_map_dict[base_name]) + msg = ( + f"Files with same name not allowed: {spatial_map_file}" + f" {spatial_map_dict[base_name]}" ) + raise ValueError(msg) spatial_map_dict[base_name] = spatial_map_file except IndexError: - raise Exception( - "Error in spatial_map_dataflow: " - "File extension not in .nii and .nii.gz" + msg = ( + "Error in spatial_map_dataflow: File extension not in .nii and .nii.gz" ) + raise ValueError(msg) inputnode = pe.Node( util.IdentityInterface( @@ -1408,6 +1428,7 @@ def create_spatial_map_dataflow(spatial_maps, wf_name="datasource_maps"): def create_grp_analysis_dataflow(wf_name="gp_dataflow"): + """Create a dataflow for group analysis.""" import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -1452,6 +1473,7 @@ def create_grp_analysis_dataflow(wf_name="gp_dataflow"): def resample_func_roi(in_func, in_roi, realignment, identity_matrix): + """Resample functional image to ROI or ROI to functional image using flirt.""" import os import nibabel as nib diff --git a/CPAC/utils/docs.py b/CPAC/utils/docs.py index 859fd0c727..796d1351b6 100644 --- a/CPAC/utils/docs.py +++ b/CPAC/utils/docs.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,16 +14,68 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -"""Utilties for documentation.""" +"""Utilties for C-PAC documentation.""" +from functools import wraps +from typing import Callable, Optional from urllib import request from urllib.error import ContentTooShortError, HTTPError, URLError +from warnings import warn from CPAC import __version__ from CPAC.utils import versioning -def docstring_parameter(*args, **kwargs): - """Decorator to parameterize docstrings. +def deprecated( + version: Optional[str] = None, explanation: Optional[str] = None +) -> Callable: + """Mark a function as deprecated. + + Parameters + ---------- + version : str, optional + The version in which the function was deprecated. + + explanation : str, optional + An explanation of why the function was deprecated. + + Returns + ------- + Callable + The decorated function. + """ + + def decorator(func: Callable) -> Callable: + if func.__doc__ is None: + func.__doc__ = "" + + note = ".. deprecated::" + if version: + note += f" {version}" + if explanation: + note += f"\n {explanation}\n" + func.__doc__ = note + "\n" + func.__doc__ + + @wraps(func) + def new_func(*args, **kwargs) -> Callable: + """Warn that the function is deprecated.""" + _warning = f"Call to deprecated function '{func.__qualname__}'." + if explanation: + _warning += f" {explanation}\n" + warn( + _warning, + category=DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return new_func + + return decorator + + +def docstring_parameter(*args, **kwargs) -> Callable: + """Parameterize docstrings. + Use double-curly-braces ({{}}) for literal curly braces. Examples @@ -42,7 +94,7 @@ def docstring_parameter(*args, **kwargs): How about { this }? """ - def dec(obj): + def dec(obj: Callable) -> Callable: if obj.__doc__ is None: obj.__doc__ = "" obj.__doc__ = obj.__doc__.format(*args, **kwargs) @@ -51,10 +103,10 @@ def dec(obj): return dec -def _docs_url_prefix(): - """Function to determine the URL prefix for this version of C-PAC.""" +def _docs_url_prefix() -> str: + """Determine the URL prefix for this version of C-PAC.""" - def _url(url_version): + def _url(url_version: str) -> str: return f"https://fcp-indi.github.io/docs/{url_version}" url_version = f"v{__version__}" @@ -71,7 +123,7 @@ def _url(url_version): def version_report() -> str: - """A formatted block of versions included in CPAC's environment.""" + """Return a formatted block of versions included in CPAC's environment.""" version_list = [] for pkg, version in versioning.REPORTED.items(): version_list.append(f"{pkg}: {version}") @@ -83,4 +135,36 @@ def version_report() -> str: return "\n".join(version_list) +def outdent_lines(docstring: str, spaces: int = 4) -> str: + """Outdent lines in a string by specified number of spaces. + + Only outdents lines that are at least that indented. + Useful for combining docstrings. + + Examples + -------- + >>> import re + >>> re.findall(r'^ Only.*$', outdent_lines.__doc__, flags=re.MULTILINE) + [' Only outdents lines that are at least that indented.'] + >>> re.findall(r'^Only.*$', outdent_lines.__doc__, flags=re.MULTILINE) + [] + >>> re.findall(r'^ Only.*$', outdent_lines(outdent_lines.__doc__), + ... flags=re.MULTILINE) + [] + >>> re.findall(r'^Only.*$', outdent_lines(outdent_lines.__doc__), + ... flags=re.MULTILINE) + ['Only outdents lines that are at least that indented.'] + >>> re.findall(r'^ Only.*$', outdent_lines(outdent_lines.__doc__, 3), + ... flags=re.MULTILINE) + [' Only outdents lines that are at least that indented.'] + """ + new_docstring = [] + for line in docstring.split("\n"): + if line.startswith(" " * spaces): + new_docstring.append(line[spaces:]) + else: + new_docstring.append(line) + return "\n".join(new_docstring) + + DOCS_URL_PREFIX = _docs_url_prefix() diff --git a/CPAC/utils/extract_data.py b/CPAC/utils/extract_data.py index 6cfdcd7f88..fcabd04cd6 100644 --- a/CPAC/utils/extract_data.py +++ b/CPAC/utils/extract_data.py @@ -1,11 +1,39 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import glob import logging import os import string import sys +from typing import BinaryIO, Optional import yaml +logger = logging.getLogger("extract_data_logs") +if logger.handlers: + for handler in logger.handlers: + logger.removeHandler(handler) +logging.basicConfig( + filename=os.path.join(os.getcwd(), "extract_data_logs.log"), + filemode="w", + level=logging.DEBUG, + format="%(levelname)s %(asctime)s %(lineno)d %(message)s", +) + def extract_data(c, param_map): """ @@ -77,7 +105,7 @@ def checkTemplate(template): "where your site and subjects are present" "Please see examples" ) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) filename, ext = os.path.splitext(os.path.basename(template)) @@ -85,7 +113,7 @@ def checkTemplate(template): if ext not in [".nii", ".nii.gz"]: msg = "Invalid file name", os.path.basename(template) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) def get_site_list(path): @@ -98,7 +126,7 @@ def check_length(scan_name, file_name): "filename- %s is too long." "It should not be more than 30 characters." % (file_name) ) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) if ( @@ -114,7 +142,7 @@ def check_length(scan_name, file_name): ) ) ) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) def create_site_subject_mapping(base, relative): @@ -164,12 +192,12 @@ def getPath(template): "Anatomical Data template incorrect. No such file or directory %s", anat_base, ) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) if not func_base: msg = "Functional Data template incorrect. No such file or directory %s, func_base" - logging.exception(msg) + logger.exception(msg) raise Exception(msg) if len(anat_base) != len(func_base): @@ -179,14 +207,14 @@ def getPath(template): "!=", func_base, ) - logging.exception(msg1) + logger.exception(msg1) msg2 = ( " Base length Unequal. Some sites are missing." "extract_data doesn't script support this.Please" "Provide your own subjects_list file" ) - logging.exception(msg2) + logger.exception(msg2) raise Exception(msg2) # calculate the length of relative paths(path after subject directory) @@ -211,7 +239,7 @@ def check_for_sessions(relative_path, path_length): "Please provide the subjects_list file to run CPAC." "For more information refer to manual" ) - logging.exception(msg) + logger.exception(msg) raise Exception(msg) return session_present, session_path, relative_path @@ -262,12 +290,10 @@ def print_begin_of_file(sub, session_id): def print_end_of_file(sub): if param_map is not None: try: - logging.debug( - "site for sub %s -> %s" % (sub, subject_map.get(sub)) - ) - logging.debug( - "scan parameters for the above site %s" - % param_map.get(subject_map.get(sub)) + logger.debug("site for sub %s -> %s", sub, subject_map.get(sub)) + logger.debug( + "scan parameters for the above site %s", + param_map.get(subject_map.get(sub)), ) print(" scan_parameters:", file=f) print( @@ -340,10 +366,10 @@ def print_end_of_file(sub): print_end_of_file(anat_sub.split("/")[0]) else: - logging.debug("skipping subject %s" % anat_sub.split("/")[0]) + logger.debug("skipping subject %s", anat_sub.split("/")[0]) except ValueError: - logging.exception(ValueError.message) + logger.exception(ValueError.message) raise except Exception as e: @@ -352,7 +378,7 @@ def print_end_of_file(sub): "paths: \n" + str(e) ) - logging.exception(err_msg) + logger.exception(err_msg) raise Exception(err_msg) def walk(index, sub): @@ -406,21 +432,21 @@ def walk(index, sub): index, sub, os.path.join(sub, session_id), session_id ) else: - logging.debug("Skipping subject %s", sub) + logger.debug("Skipping subject %s", sub) else: - logging.debug("No sessions") + logger.debug("No sessions") session_id = "" fetch_path(index, sub, sub, session_id) except Exception: - logging.exception(Exception.message) + logger.exception(Exception.message) raise except: - err_msg = "Please make sessions are consistent across all " "subjects.\n\n" + err_msg = "Please make sessions are consistent across all subjects.\n\n" - logging.exception(err_msg) + logger.exception(err_msg) raise Exception(err_msg) try: @@ -429,17 +455,21 @@ def walk(index, sub): # check if subject is present in subject_list if subject_list: if sub in subject_list and sub not in exclusion_list: - logging.debug("extracting data for subject: %s", sub) + logger.debug("extracting data for subject: %s", sub) walk(i, sub) # check that subject is not in exclusion list elif sub not in exclusion_list and sub not in ".DS_Store": - logging.debug("extracting data for subject: %s", sub) + logger.debug("extracting data for subject: %s", sub) walk(i, sub) os.path.join(c.outputSubjectListLocation, "CPAC_subject_list.yml") + logger.info( + "Extraction Successfully Completed...Input Subjects_list for CPAC - %s", + name, + ) except Exception: - logging.exception(Exception.message) + logger.exception(Exception.message) raise finally: @@ -454,22 +484,20 @@ def generate_supplementary_files(data_config_outdir, data_config_name): import csv import os - from sets import Set - data_config_path = os.path.join(data_config_outdir, data_config_name) try: subjects_list = yaml.safe_load(open(data_config_path, "r")) except: - "\n\n[!] Data configuration file couldn't be read!\nFile " "path: {0}\n".format( + "\n\n[!] Data configuration file couldn't be read!\nFile path: {0}\n".format( data_config_path ) - subject_scan_set = Set() - subID_set = Set() - session_set = Set() - subject_set = Set() - scan_set = Set() + subject_scan_set = set() + subID_set = set() + session_set = set() + subject_set = set() + scan_set = set() data_list = [] try: @@ -503,8 +531,11 @@ def generate_supplementary_files(data_config_outdir, data_config_name): except TypeError: err_str = ( - "Check formatting of your anatomical/functional path " - "templates and inclusion/exclusion subjects text files" + "Subject list could not be populated!\nThis is most likely due to a" + " mis-formatting in your inclusion and/or exclusion subjects txt file or" + " your anatomical and/or functional path templates.\nCheck formatting of" + " your anatomical/functional path templates and inclusion/exclusion" + " subjects text files" ) raise TypeError(err_str) @@ -530,10 +561,7 @@ def generate_supplementary_files(data_config_outdir, data_config_name): data_config_outdir, "phenotypic_template_%s.csv" % data_config_name ) - try: - f = open(file_name, "wb") - except: - raise IOError + f = _sassy_try_open_wb(file_name) writer = csv.writer(f) @@ -543,6 +571,8 @@ def generate_supplementary_files(data_config_outdir, data_config_name): f.close() + logger.info("Template Phenotypic file for group analysis - %s", file_name) + """ # generate the phenotypic file templates for repeated measures if (len(session_set) > 1) and (len(scan_set) > 1): @@ -551,14 +581,7 @@ def generate_supplementary_files(data_config_outdir, data_config_name): '_measures_mult_sessions_and_scans_%s.csv' \ % data_config_name) - try: - f = open(file_name, 'wb') - except: - print '\n\nCPAC says: I couldn\'t save this file to your drive:\n' - print file_name, '\n\n' - print 'Make sure you have write access? Then come back. Don\'t ' \ - 'worry.. I\'ll wait.\n\n' - raise IOError + f = _sassy_try_open_wb(file_name) writer = csv.writer(f) writer.writerow(['participant', 'session', 'series', 'EV1', '..']) @@ -570,22 +593,17 @@ def generate_supplementary_files(data_config_outdir, data_config_name): f.close() - print "Template Phenotypic file for group analysis with repeated " \ - "measures (multiple sessions and scans) - %s" % file_name + logger.info( + "Template Phenotypic file for group analysis with repeated " + "measures (multiple sessions and scans) - %s", file_name + ) if (len(session_set) > 1): file_name = os.path.join(data_config_outdir, 'phenotypic_template_repeated' \ '_measures_multiple_sessions_%s.csv' % data_config_name) - try: - f = open(file_name, 'wb') - except: - print '\n\nCPAC says: I couldn\'t save this file to your drive:\n' - print file_name, '\n\n' - print 'Make sure you have write access? Then come back. Don\'t ' \ - 'worry.. I\'ll wait.\n\n' - raise IOError + f = _sassy_try_open_wb(file_name) writer = csv.writer(f) @@ -597,22 +615,17 @@ def generate_supplementary_files(data_config_outdir, data_config_name): f.close() - print "Template Phenotypic file for group analysis with repeated " \ - "measures (multiple sessions) - %s" % file_name + logger.info( + "Template Phenotypic file for group analysis with repeated " + "measures (multiple sessions) - %s", file_name + ) if (len(scan_set) > 1): file_name = os.path.join(data_config_outdir, 'phenotypic_template_repeated' \ '_measures_multiple_scans_%s.csv' % data_config_name) - try: - f = open(file_name, 'wb') - except: - print '\n\nCPAC says: I couldn\'t save this file to your drive:\n' - print file_name, '\n\n' - print 'Make sure you have write access? Then come back. Don\'t ' \ - 'worry.. I\'ll wait.\n\n' - raise IOError + f = _sassy_try_open_wb(file_name) writer = csv.writer(f) @@ -624,8 +637,9 @@ def generate_supplementary_files(data_config_outdir, data_config_name): f.close() - print "Template Phenotypic file for group analysis with repeated " \ - "measures (multiple scans) - %s" % file_name + logger.info("Template Phenotypic file for group analysis with repeated " + "measures (multiple scans) - %s", file_name + ) """ # generate the group analysis subject lists @@ -638,7 +652,11 @@ def generate_supplementary_files(data_config_outdir, data_config_name): for sub in sorted(subID_set): print(sub, file=f) except: - raise IOError + _sassy_oserror(file_name) + + logger.info( + "Participant list required later for group analysis - %s\n\n", file_name + ) def read_csv(csv_input): @@ -666,28 +684,44 @@ def read_csv(csv_input): ] if len(dict_labels) < 1: - msg = "Scan Parameters File is either empty" "or missing header" - logging.exception(msg) + msg = "Scan Parameters File is either empty or missing header" + logger.exception(msg) raise Exception(msg) return dict_labels except IOError: msg = "Error reading the csv file %s", csv_input - logging.exception(msg) + logger.exception(msg) raise Exception(msg) except: msg = "Error reading scan parameters csv. Make sure you are using the correct template" - logging.exception(msg) + logger.exception(msg) raise Exception(msg) -""" -Class to set dictionary keys as map attributes -""" +def _sassy_oserror(file_name: str) -> None: + """Open a file in 'wb' mode or raise a sassy OSError if a file can't be saved.""" + msg = ( + f"\n\nCPAC says: I couldn't save this file to your drive:\n {file_name}" + "\n\nMake sure you have write access? Then come back. Don't worry.. I'll" + " wait.\n\n" + ) + raise OSError(msg) + + +def _sassy_try_open_wb(file_name: str) -> Optional[BinaryIO]: + f = None + try: + f = open(file_name, "wb") + except (OSError, TypeError): + _sassy_oserror(file_name) + return f class Configuration(object): + """Set dictionary keys as map attributes.""" + def __init__(self, config_map): for key in config_map: if config_map[key] == "None": @@ -700,15 +734,9 @@ def run(data_config): Run method takes data_config file as the input argument. """ - root = logging.getLogger() - if root.handlers: - for handler in root.handlers: - root.removeHandler(handler) - logging.basicConfig( - filename=os.path.join(os.getcwd(), "extract_data_logs.log"), - filemode="w", - level=logging.DEBUG, - format="%(levelname)s %(asctime)s %(lineno)d %(message)s", + logger.info( + "For any errors or messages check the log file - %s", + os.path.join(os.getcwd(), "extract_data_logs.log"), ) c = Configuration(yaml.safe_load(open(os.path.realpath(data_config), "r"))) @@ -716,7 +744,7 @@ def run(data_config): if c.scanParametersCSV is not None: read_csv(c.scanParametersCSV) else: - logging.debug( + logger.debug( "no scan parameters csv included\n" "make sure you turn off slice timing correction option\n" "in CPAC configuration\n" @@ -727,6 +755,7 @@ def run(data_config): if __name__ == "__main__": if len(sys.argv) != 2: + print("Usage: python extract_data.py data_config.yml") # noqa T201 sys.exit() else: run(sys.argv[1]) diff --git a/CPAC/utils/extract_data_multiscan.py b/CPAC/utils/extract_data_multiscan.py index d24bc9a0a2..a94842ce53 100644 --- a/CPAC/utils/extract_data_multiscan.py +++ b/CPAC/utils/extract_data_multiscan.py @@ -1,9 +1,27 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import glob import os import string import yaml +from CPAC.utils.monitoring import UTLOGGER + def extract_data(c, param_map): """ @@ -83,17 +101,19 @@ def get_list(arg): # check if Template is correct def checkTemplate(template): if template.count("%s") != 2: - raise Exception( + msg = ( "Please provide '%s' in the template" "where your site and subjects are present" "Please see examples" ) + raise Exception(msg) filename, ext = os.path.splitext(os.path.basename(template)) ext = os.path.splitext(filename)[1] + ext if ext not in [".nii", ".nii.gz"]: - raise Exception("Invalid file name", os.path.basename(template)) + msg = "Invalid file name" + raise Exception(msg, os.path.basename(template)) def get_site_list(path): base = path.split("%s")[0] @@ -165,17 +185,26 @@ def getPath(template): func_base, func_relative, subject_map = getPath(c.functionalTemplate) if not anat_base: - raise Exception("Anatomical Data template incorrect") + msg = ( + f"No such file or directory {anat_base}" + "\nAnatomical Data template incorrect" + ) + raise FileNotFoundError(msg) if not func_base: - raise Exception("Functional Data template incorrect") + msg = ( + f"No such file or directory {func_base}" + "Functional Data template incorrect" + ) + raise FileNotFoundError(msg) if len(anat_base) != len(func_base): - raise Exception( - " Base length Unequal. Some sites are missing." - "extract_data doesn't script support this.Please" - "Provide your own subjects_list file" + msg = ( + f"Some sites are missing, Please check your template {anat_base} !=" + f" {func_base}\nBase length Unequal. Some sites are missing. extract_data" + " doesn't script support this. Please provide your own subjects_list file" ) + raise FileNotFoundError(msg) # calculate the length of relative paths(path after subject directory) func_relative_len = len(func_relative.split("/")) @@ -194,12 +223,13 @@ def check_for_sessions(relative_path, path_length): relative_path = string.join(relative_path_list[1:], "/") session_present = True elif path_length > 3: - raise Exception( + msg = ( "extract_data script currently doesn't support" "this directory structure.Please provide the" "subjects_list file to run CPAC." "For more information refer to manual" ) + raise Exception(msg) return session_present, session_path, relative_path @@ -270,6 +300,7 @@ def print_scan_param(index): " parameters csv file" % (subject_map.get(sub), scan[0]) ) + UTLOGGER.info("site for sub %s -> %s", sub, subject_map.get(sub)) print(" scan_parameters: ", file=f) print(" tr:", file=f) print_scan_param(4) @@ -364,13 +395,13 @@ def walk(index, sub): index, sub, os.path.join(sub, session_id), session_id ) else: + UTLOGGER.info("No sessions") session_id = "" fetch_path(index, sub, sub, session_id) - except Exception: - raise - except: - raise + except Exception as e: + msg = "Please make sessions are consistent across all subjects" + raise ValueError(msg) from e try: for i in range(len(anat_base)): @@ -378,12 +409,15 @@ def walk(index, sub): # check if subject is present in subject_list if subject_list: if sub in subject_list and sub not in exclusion_list: + UTLOGGER.info("extracting data for subject: %s", sub) walk(i, sub) # check that subject is not in exclusion list elif sub not in exclusion_list and sub not in ".DS_Store": + UTLOGGER.info("extracting data for subject: %s", sub) walk(i, sub) - os.path.join(c.outputSubjectListLocation, "CPAC_subject_list.yml") + name = os.path.join(c.outputSubjectListLocation, "CPAC_subject_list.yml") + UTLOGGER.info("Extraction Complete...Input Subjects_list for CPAC - %s", name) except Exception: raise finally: @@ -397,15 +431,13 @@ def generate_suplimentary_files(output_path): """ import csv - from sets import Set - subjects_list = yaml.safe_load( open(os.path.join(output_path, "CPAC_subject_list.yml"), "r") ) - subject_scan_set = Set() - subject_set = Set() - scan_set = Set() + subject_scan_set = set() + subject_set = set() + scan_set = set() data_list = [] for sub in subjects_list: @@ -456,12 +488,15 @@ def generate_suplimentary_files(output_path): f.close() + UTLOGGER.info("Template Phenotypic file for group analysis - %s", file_name) + file_name = os.path.join(output_path, "subject_list_group_analysis.txt") f = open(file_name, "w") for sub in subject_set: print(sub, file=f) + UTLOGGER.info("Subject list required later for group analysis - %s", file_name) f.close() @@ -489,9 +524,11 @@ def read_csv(csv_input): ] if len(dict_labels) < 1: - raise Exception("Scan Parameters File is either empty" "or missing header") + msg = "Scan Parameters File is either empty or missing header" + raise ValueError(msg) except: - raise + msg = "Error reading scan parameters csv" + raise ValueError(msg) return dict_labels @@ -519,6 +556,10 @@ def run(data_config): if c.scanParametersCSV is not None: s_param_map = read_csv(c.scanParametersCSV) else: + UTLOGGER.warning( + "no scan parameters csv included. make sure you turn off slice timing" + " correction option in CPAC configuration" + ) s_param_map = None extract_data(c, s_param_map) diff --git a/CPAC/utils/extract_parameters.py b/CPAC/utils/extract_parameters.py index 9b7843ce45..b867f3a502 100644 --- a/CPAC/utils/extract_parameters.py +++ b/CPAC/utils/extract_parameters.py @@ -1,8 +1,21 @@ +# Copyright (C) 2013-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . def merge(output_dir, scan_name, threshold, motion_f, power_f, flag): - """ - Method to merge power parameters and motion - parameters file. - """ + """Merge power parameters and motion parameters file.""" import os import re @@ -79,8 +92,6 @@ def grab(output_dir, scrubbing): import os import re - from sets import Set - pipelines = glob.glob(os.path.join(output_dir, "pipeline*")) for p in pipelines: @@ -102,8 +113,8 @@ def grab(output_dir, scrubbing): if val: threshold_list.append(val.group(0)) - scan_list = Set(scan_list) - threshold_list = Set(threshold_list) + scan_list = set(scan_list) + threshold_list = set(threshold_list) for scan in scan_list: for threshold in threshold_list: @@ -149,4 +160,4 @@ def run(output_path, scrubbing): if len(sys.argv) == 2: grab(sys.argv[1], [0]) else: - pass + print("Usage: python extract_parameters.py /path/to/output/dir") # noqa: T201 diff --git a/CPAC/utils/ga.py b/CPAC/utils/ga.py index a59d979ab0..2df25ba937 100644 --- a/CPAC/utils/ga.py +++ b/CPAC/utils/ga.py @@ -1,3 +1,19 @@ +# Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import configparser import os import os.path as op @@ -10,6 +26,7 @@ from CPAC.info import __version__, ga_tracker +temp_dir = False udir = op.expanduser("~") if udir == "/": udir = tempfile.mkdtemp() @@ -43,24 +60,24 @@ def get_uid(): def do_it(data, timeout): try: - headers = { - "User-Agent": "C-PAC/{} (https://fcp-indi.github.io)".format(__version__) - } - return requests.post( + headers = {"User-Agent": f"C-PAC/{__version__} (https://fcp-indi.github.io)"} + _done = requests.post( "https://www.google-analytics.com/collect", data=data, timeout=timeout, headers=headers, ) except: - return False - if temp_dir: - try: - os.remove(tracking_path) - os.rmdir(udir) - except: - pass - return None + _done = False + finally: + if temp_dir: + try: + os.remove(tracking_path) + os.rmdir(udir) + except (TypeError, OSError) as e: + msg = f"Unable to delete temporary tracking path {tracking_path}." + raise OSError(msg) from e + return _done def track_event( diff --git a/CPAC/utils/interfaces/ants.py b/CPAC/utils/interfaces/ants.py index 5fbd41c810..93dbd7a4eb 100644 --- a/CPAC/utils/interfaces/ants.py +++ b/CPAC/utils/interfaces/ants.py @@ -229,7 +229,8 @@ class ResampleImageBySpacing(ANTSCommand): def _format_arg(self, name, trait_spec, value): if name == "out_spacing": if len(value) != self.inputs.dimension: - raise ValueError("out_spacing dimensions should match dimension") + msg = "out_spacing dimensions should match dimension" + raise ValueError(msg) value = " ".join(["%d" % d for d in value]) diff --git a/CPAC/utils/interfaces/brickstat.py b/CPAC/utils/interfaces/brickstat.py index 513786bf88..1aac6d5331 100644 --- a/CPAC/utils/interfaces/brickstat.py +++ b/CPAC/utils/interfaces/brickstat.py @@ -1,6 +1,5 @@ import os -from nipype import logging from nipype.interfaces.afni.base import ( AFNICommandBase, ) @@ -12,8 +11,6 @@ ) from nipype.utils.filemanip import load_json, save_json -iflogger = logging.getLogger("nipype.interface") - class BrickStatInputSpec(CommandLineInputSpec): in_file = File( diff --git a/CPAC/utils/interfaces/datasink.py b/CPAC/utils/interfaces/datasink.py index e87bc2f4db..91a011f596 100644 --- a/CPAC/utils/interfaces/datasink.py +++ b/CPAC/utils/interfaces/datasink.py @@ -5,20 +5,19 @@ from shutil import SameFileError import time -from nipype import config, logging -from nipype.interfaces.base import Undefined, isdefined, traits +from nipype import config +from nipype.interfaces.base import isdefined, traits, Undefined from nipype.interfaces.io import ( + copytree, DataSinkInputSpec, DataSinkOutputSpec, IOBase, ProgressPercentage, - copytree, ) from nipype.utils.filemanip import copyfile, ensure_list from nipype.utils.misc import str2bool -iflogger = logging.getLogger("nipype.interface") - +from CPAC.utils.monitoring import FMLOGGER, IFLOGGER RETRY = 5 RETRY_WAIT = 5 @@ -170,7 +169,7 @@ def _substitute(self, pathstr): oldpathstr = pathstr pathstr = pathstr.replace(key, val) if pathstr != oldpathstr: - iflogger.debug( + IFLOGGER.debug( "sub.str: %s -> %s using %r -> %r", oldpathstr, pathstr, @@ -182,7 +181,7 @@ def _substitute(self, pathstr): oldpathstr = pathstr pathstr, _ = re.subn(key, val, pathstr) if pathstr != oldpathstr: - iflogger.debug( + IFLOGGER.debug( "sub.regexp: %s -> %s using %r -> %r", oldpathstr, pathstr, @@ -190,7 +189,7 @@ def _substitute(self, pathstr): val, ) if pathstr_ != pathstr: - iflogger.info("sub: %s -> %s", pathstr_, pathstr) + IFLOGGER.info("sub: %s -> %s", pathstr_, pathstr) return pathstr # Check for s3 in base directory @@ -319,7 +318,7 @@ def _fetch_bucket(self, bucket_name): import boto3 import botocore except ImportError: - err_msg = "Boto3 package is not installed - install boto3 and " "try again." + err_msg = "Boto3 package is not installed - install boto3 and try again." raise Exception(err_msg) # Init variables @@ -338,7 +337,7 @@ def _fetch_bucket(self, bucket_name): # Try and get AWS credentials if a creds_path is specified if aws_access_key_id and aws_secret_access_key: # Init connection - iflogger.info( + IFLOGGER.info( "Connecting to S3 bucket: %s with credentials...", bucket_name ) # Use individual session for each instance of DataSink @@ -350,7 +349,7 @@ def _fetch_bucket(self, bucket_name): ) else: - iflogger.info("Connecting to S3 bucket: %s with IAM role...", bucket_name) + IFLOGGER.info("Connecting to S3 bucket: %s with IAM role...", bucket_name) # Lean on AWS environment / IAM role authentication and authorization session = boto3.session.Session() @@ -366,7 +365,7 @@ def _fetch_bucket(self, bucket_name): "choose-signer.s3.*", botocore.handlers.disable_signing ) - iflogger.info("Connecting to AWS: %s anonymously...", bucket_name) + IFLOGGER.info("Connecting to AWS: %s anonymously...", bucket_name) _get_head_bucket(s3_resource, bucket_name) # Explicitly declare a secure SSL connection for bucket object @@ -419,16 +418,16 @@ def _upload_to_s3(self, bucket, src, dst): src_md5 = hashlib.md5(src_read).hexdigest() # Move to next loop iteration if dst_md5 == src_md5: - iflogger.info("File %s already exists on S3, skipping...", dst_f) + FMLOGGER.info("File %s already exists on S3, skipping...", dst_f) continue else: - iflogger.info("Overwriting previous S3 file...") + FMLOGGER.info("Overwriting previous S3 file...") except ClientError: - iflogger.info("New file to S3") + FMLOGGER.info("New file to S3") # Copy file up to S3 (either encrypted or not) - iflogger.info( + FMLOGGER.info( "Uploading %s to S3 bucket, %s, as %s...", src_f, bucket.name, dst_f ) if self.inputs.encrypt_bucket_keys: @@ -492,7 +491,7 @@ def _list_outputs(self): ) outdir = local_out_exception # Log local copying directory - iflogger.info( + FMLOGGER.info( "Access to S3 failed! Storing outputs locally at: " "%s\nError: %s", outdir, @@ -523,7 +522,7 @@ def _list_outputs(self): for key, files in list(self.inputs._outputs.items()): if not isdefined(files): continue - iflogger.debug("key: %s files: %s", key, str(files)) + IFLOGGER.debug("key: %s files: %s", key, str(files)) files = ensure_list(files if files else []) tempoutdir = outdir if s3_flag: @@ -574,7 +573,7 @@ def _list_outputs(self): if (not os.path.exists(dst)) or (os.stat(src) != os.stat(dst)): # If src is a file, copy it to dst if os.path.isfile(src): - iflogger.debug(f"copyfile: {src} {dst}") + FMLOGGER.debug(f"copyfile: {src} {dst}") copyfile( src, dst, @@ -586,13 +585,13 @@ def _list_outputs(self): # entire contents to dst dir elif os.path.isdir(src): if os.path.exists(dst) and self.inputs.remove_dest_dir: - iflogger.debug("removing: %s", dst) + FMLOGGER.debug("removing: %s", dst) shutil.rmtree(dst) - iflogger.debug("copydir: %s %s", src, dst) + FMLOGGER.debug("copydir: %s %s", src, dst) copytree(src, dst) out_files.append(dst) except SameFileError: - iflogger.debug(f"copyfile (same file): {src} {dst}") + FMLOGGER.debug(f"copyfile (same file): {src} {dst}") # Return outputs dictionary outputs["out_file"] = out_files diff --git a/CPAC/utils/interfaces/fsl.py b/CPAC/utils/interfaces/fsl.py index f08a45db0b..3a4f24e5ae 100644 --- a/CPAC/utils/interfaces/fsl.py +++ b/CPAC/utils/interfaces/fsl.py @@ -11,7 +11,8 @@ class Merge(fslMerge): def _format_arg(self, name, spec, value): if name == "tr": if self.inputs.dimension != "t": - raise ValueError("When TR is specified, dimension must be t") + msg = "When TR is specified, dimension must be t" + raise ValueError(msg) return spec.argstr % value if name == "dimension": if isdefined(self.inputs.tr): diff --git a/CPAC/utils/interfaces/function/function.py b/CPAC/utils/interfaces/function/function.py index 21178c2b2f..be619f5f17 100644 --- a/CPAC/utils/interfaces/function/function.py +++ b/CPAC/utils/interfaces/function/function.py @@ -1,42 +1,158 @@ +# from https://github.com/nipy/nipype/blob/0.13.1/nipype/interfaces/utility/wrappers.py + +# CHANGES: +# * Adds `as_module` argument and property +# * Adds `sig_imports` decorator +# * Automatically imports global Nipype loggers in function nodes + +# ORIGINAL WORK'S ATTRIBUTION NOTICE: +# Copyright (c) 2009-2016, Nipype developers + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Prior to release 0.12, Nipype was licensed under a BSD license. + +# Modifications Copyright (C) 2018-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Interface for wrapping Python functions. + +Like the built-in nipype Function interace, except includes +- `as_module` to allow module.function name +- `sig_imports` to set necessary imports on function nodes with a decorator +""" +from ast import FunctionDef, parse from builtins import bytes, str +from importlib import import_module import inspect -from typing import Callable, List +from typing import Callable, Optional -from nipype import logging from nipype.interfaces.base import ( - BaseInterfaceInputSpec, - DynamicTraitedSpec, - Undefined, isdefined, - traits, ) -from nipype.interfaces.io import IOBase, add_traits +from nipype.interfaces.io import add_traits, IOBase +from nipype.interfaces.utility.wrappers import Function as NipypeFunction from nipype.utils.filemanip import ensure_list -from nipype.utils.functions import create_function_from_source, getsource +from nipype.utils.functions import getsource + +from CPAC.utils.docs import outdent_lines +from CPAC.utils.typing import LIST, TUPLE + +_AUTOLOGGING_IMPORTS = [ + "from CPAC.utils.monitoring.custom_logging import FMLOGGER, IFLOGGER, UTLOGGER," + " WFLOGGER" +] -iflogger = logging.getLogger("nipype.interface") +def _as_module(fxn: str, ns: dict) -> TUPLE[str, dict]: + """Get full module name and namespace.""" + module = inspect.getmodule(fxn).__name__ + return f"{module}.{fxn.__name__}", _module_imports(module, ns, fxn.__name__) -class FunctionInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): - function_str = traits.Str(mandatory=True, desc="code for function") +def get_function_name_from_source(function_source: str) -> str: + r"""Get the name of a function from its source code. -class Function(IOBase): - """Runs arbitrary function as an interface. + Parameters + ---------- + function_source: str + The source code of the function. + + Returns + ------- + str + The name of the function. Examples -------- - >>> func = 'def func(arg1, arg2=5): return arg1 + arg2' - >>> fi = Function(input_names=['arg1', 'arg2'], output_names=['out']) - >>> fi.inputs.function_str = func - >>> res = fi.run(arg1=1) - >>> res.outputs.out - 6 + >>> get_function_name_from_source("def fake_function():\n return") + 'fake_function' + >>> get_function_name_from_source("not a def") + Traceback (most recent call last): + ... + ValueError: No function definition found in the provided source. + >>> get_function_name_from_source("class FakeClass:\n pass") + Traceback (most recent call last): + ... + ValueError: No function definition found in the provided source. + """ + value_error = ValueError("No function definition found in the provided source.") + try: + for node in parse(function_source).body: + if isinstance(node, FunctionDef): + return node.name + except SyntaxError as syntax_error: + raise value_error from syntax_error + raise value_error + + +def create_function_from_source( + function_source: str, imports: Optional[LIST[str]] = None, ns: Optional[dict] = None +): + """Return a function object from a function source. + Parameters + ---------- + function_source : unicode string + unicode string defining a function + imports : list of strings + list of import statements in string form that allow the function + to be executed in an otherwise empty namespace + ns : dict + namespace dictionary """ + if ns is None: + ns = {} + import_keys = [] + try: + if imports is not None: + for statement in imports: + exec(statement, ns) + import_keys = list(ns.keys()) + exec(function_source, ns) - input_spec = FunctionInputSpec - output_spec = DynamicTraitedSpec + except Exception as e: + msg = f"Error executing function\n{function_source}\n" + msg += ( + "Functions in connection strings have to be standalone. " + "They cannot be declared either interactively or inside " + "another function or inline in the connect string. Any " + "imports should be done inside the function." + ) + raise RuntimeError(msg) from e + ns_funcs = list(set(ns) - {*import_keys, "__builtins__"}) + assert len(ns_funcs) == 1, "Function or inputs are ill-defined" + return ns[ns_funcs[0]] + + +class Function(NipypeFunction): + """Can automatically set a module name on the interface. + + Automatically imports global Nipype loggers. + """ def __init__( self, @@ -47,7 +163,7 @@ def __init__( as_module=False, **inputs, ): - """ + """Initialize a :py:func`~CPAC.utils.interfaces.function.Function` interface. Parameters ---------- @@ -68,7 +184,10 @@ def __init__( decorator, the imports given as a parameter here will take precedence over those from the decorator. """ - super().__init__(**inputs) + super(IOBase, self).__init__(**inputs) + ns = {} + if imports is None: + imports = [] if function: if hasattr(function, "ns_imports"): # prepend the ns_imports from the decorator to @@ -77,47 +196,49 @@ def __init__( "from CPAC.utils.interfaces.function import Function", *function.ns_imports, ] - imports = _ns_imports if imports is None else [*_ns_imports, *imports] + imports = _ns_imports if not imports else [*_ns_imports, *imports] if as_module: - module = inspect.getmodule(function).__name__ - full_name = "%s.%s" % (module, function.__name__) - self.inputs.function_str = full_name + self.inputs.function_str, ns = _as_module(function, ns) elif hasattr(function, "__call__"): try: self.inputs.function_str = getsource(function) - except IOError: - raise Exception( + except IOError as os_error: + msg = ( "Interface Function does not accept " "function objects defined interactively " "in a python session" ) + raise ValueError(msg) from os_error else: if input_names is None: fninfo = function.__code__ elif isinstance(function, (str, bytes)): self.inputs.function_str = function if input_names is None: - fninfo = create_function_from_source(function, imports).__code__ + fninfo = create_function_from_source(function, imports, ns).__code__ else: - raise Exception("Unknown type of function") + msg = "Unknown type of function" + raise TypeError(msg) if input_names is None: - input_names = fninfo.co_varnames[: fninfo.co_argcount] + try: + input_names = fninfo.co_varnames[: fninfo.co_argcount] + except NameError: + input_names = [] self.as_module = as_module self.inputs.on_trait_change(self._set_function_string, "function_str") self._input_names = ensure_list(input_names) self._output_names = ensure_list(output_names) add_traits(self.inputs, list(self._input_names)) - self.imports = imports + self.imports = [*imports, *_AUTOLOGGING_IMPORTS] self._out = {} for name in self._output_names: self._out[name] = None @staticmethod - def sig_imports(imports: List[str]) -> Callable: - """ - Sets an ``ns_imports`` attribute on a function for - Function-node functions. + def sig_imports(imports: LIST[str]) -> Callable: + """Set an ``ns_imports`` attribute on a function for Function-node functions. + This can be useful for classes needed for decorators, typehints and for avoiding redefinitions. @@ -144,14 +265,9 @@ def sig_imports(imports: List[str]) -> Callable: ... output_names=['out_file'], ... function=calculate_FD_J, ... as_module=True) - >>> calc_fdj.imports # doctest: +NORMALIZE_WHITESPACE - ['from CPAC.utils.interfaces.function import Function', - 'import os', - 'import sys', - 'from typing import Optional', - 'import numpy as np', - 'from CPAC.utils.pytest import skipif', - 'from CPAC.utils.typing import LITERAL, TUPLE'] + >>> calc_fdj.imports == ["from CPAC.utils.interfaces.function import Function", + ... *calculate_FD_J.ns_imports, *_AUTOLOGGING_IMPORTS] + True >>> from inspect import signature >>> from nipype.utils.functions import (getsource, ... create_function_from_source) @@ -169,16 +285,15 @@ def _imports(func: Callable) -> Callable: def _set_function_string(self, obj, name, old, new): if name == "function_str": + ns = {} if self.as_module: - module = inspect.getmodule(new).__name__ - full_name = "%s.%s" % (module, new.__name__) - self.inputs.function_str = full_name + self.inputs.function_str, ns = _as_module(new, ns) elif hasattr(new, "__call__"): function_source = getsource(new) fninfo = new.__code__ elif isinstance(new, (str, bytes)): function_source = new - fninfo = create_function_from_source(new, self.imports).__code__ + fninfo = create_function_from_source(new, self.imports, ns).__code__ self.inputs.trait_set( trait_change_notify=False, **{"%s" % name: function_source} ) @@ -188,32 +303,24 @@ def _set_function_string(self, obj, name, old, new): add_traits(self.inputs, list(new_names)) self._input_names.extend(new_names) - def _add_output_traits(self, base): - undefined_traits = {} - for key in self._output_names: - base.add_trait(key, traits.Any) - undefined_traits[key] = Undefined - base.trait_set(trait_change_notify=False, **undefined_traits) - return base - def _run_interface(self, runtime): # Create function handle + ns = {} if self.as_module: - import importlib - pieces = self.inputs.function_str.split(".") module = ".".join(pieces[:-1]) function = pieces[-1] + ns = _module_imports(module, ns, function) try: - function_handle = getattr(importlib.import_module(module), function) - except ImportError: - raise RuntimeError( - "Could not import module: %s" % self.inputs.function_str + function_str = inspect.getsource( + getattr(import_module(module), function) ) + except ImportError as import_error: + msg = f"Could not import module: {self.inputs.function_str}" + raise RuntimeError(msg) from import_error else: - function_handle = create_function_from_source( - self.inputs.function_str, self.imports - ) + function_str = self.inputs.function_str + function_handle = create_function_from_source(function_str, self.imports, ns) # Get function args args = {} @@ -221,22 +328,30 @@ def _run_interface(self, runtime): value = getattr(self.inputs, name) if isdefined(value): args[name] = value - out = function_handle(**args) if len(self._output_names) == 1: self._out[self._output_names[0]] = out else: if isinstance(out, tuple) and (len(out) != len(self._output_names)): - raise RuntimeError("Mismatch in number of expected outputs") + msg = "Mismatch in number of expected outputs" + raise RuntimeError(msg) - else: - for idx, name in enumerate(self._output_names): - self._out[name] = out[idx] + for idx, name in enumerate(self._output_names): + self._out[name] = out[idx] return runtime - def _list_outputs(self): - outputs = self._outputs().get() - for key in self._output_names: - outputs[key] = self._out[key] - return outputs + +Function.__doc__ = "\n\n".join( + [NipypeFunction.__doc__.rstrip(), outdent_lines(Function.__doc__)] +) + + +def _module_imports(module: str, ns: dict, fxn: str) -> dict: + """Import module-level imports to a namespace.""" + exec(f"from {module} import *", ns) + try: + exec(f"del {fxn}", ns) # We'll redefine the function itself... + except NameError: + pass # ...unless the function isn't defined in a module + return ns diff --git a/CPAC/utils/interfaces/masktool.py b/CPAC/utils/interfaces/masktool.py index 5993cc3b59..1d386cf7bb 100644 --- a/CPAC/utils/interfaces/masktool.py +++ b/CPAC/utils/interfaces/masktool.py @@ -1,9 +1,6 @@ -from nipype import logging from nipype.interfaces.afni.base import AFNICommand, AFNICommandInputSpec from nipype.interfaces.base import File, InputMultiPath, Str, TraitedSpec, traits -iflogger = logging.getLogger("nipype.interface") - class MaskToolInputSpec(AFNICommandInputSpec): in_files = InputMultiPath( diff --git a/CPAC/utils/interfaces/netcorr.py b/CPAC/utils/interfaces/netcorr.py index c5b5bc52e7..0a410aad01 100644 --- a/CPAC/utils/interfaces/netcorr.py +++ b/CPAC/utils/interfaces/netcorr.py @@ -34,7 +34,7 @@ class NetCorrInputSpec(AFNICommandInputSpec): ) automask_off = Bool( False, - desc="If you want to neither put in a mask " "*nor* have the automasking occur", + desc="If you want to neither put in a mask *nor* have the automasking occur", argstr="-automask_off", usedefault=True, ) diff --git a/CPAC/utils/interfaces/pc.py b/CPAC/utils/interfaces/pc.py index 91deb20420..574036b154 100644 --- a/CPAC/utils/interfaces/pc.py +++ b/CPAC/utils/interfaces/pc.py @@ -1,9 +1,6 @@ -from nipype import logging from nipype.interfaces.afni.base import AFNICommand, AFNICommandInputSpec from nipype.interfaces.base import File, TraitedSpec, traits -iflogger = logging.getLogger("nipype.interface") - class PCInputSpec(AFNICommandInputSpec): in_file = File( diff --git a/CPAC/utils/interfaces/tests/test_function.py b/CPAC/utils/interfaces/tests/test_function.py new file mode 100644 index 0000000000..dc731956b7 --- /dev/null +++ b/CPAC/utils/interfaces/tests/test_function.py @@ -0,0 +1,50 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test Function interface.""" +from pytest import mark, raises + +from CPAC.utils.interfaces.function.function import Function + + +def faux_fxn(_loggers: bool = True): + """Require autoassignment (for testing).""" + if _loggers: + return WFLOGGER, IFLOGGER # noqa: F821 + return luigi_mario # noqa: F821 + + +@mark.parametrize("as_module", [True, False]) +def test_autologger(as_module: bool) -> None: + """Test autoassignment of global Nipype loggers`.""" + interface = Function( + function=faux_fxn, input_names=["_loggers"], as_module=as_module + ) + interface.inputs._loggers = False + with raises(NameError) as name_error: + interface.run() + assert "name 'luigi_mario' is not defined" in str(name_error.value) + + interface = Function( + function=faux_fxn, + input_names=["_loggers"], + output_names=["logger", "iflogger"], + as_module=as_module, + ) + interface.inputs._loggers = True + res = interface.run() + assert res.outputs.logger.name == "nipype.workflow" + assert res.outputs.iflogger.name == "nipype.interface" diff --git a/CPAC/utils/monitoring/__init__.py b/CPAC/utils/monitoring/__init__.py index d9b5f4b33a..8be35228e1 100644 --- a/CPAC/utils/monitoring/__init__.py +++ b/CPAC/utils/monitoring/__init__.py @@ -1,22 +1,48 @@ -"""Module to customize Nipype's process monitoring for use in C-PAC. +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Customize Nipype's process monitoring for use in C-PAC. See https://fcp-indi.github.io/docs/developer/nodes for C-PAC-specific documentation. See https://nipype.readthedocs.io/en/latest/api/generated/nipype.utils.profiler.html for Nipype's documentation. """ # pylint: disable=line-too-long from .config import LOGTAIL, WARNING_FREESURFER_OFF_WITH_DATA -from .custom_logging import failed_to_start, getLogger, set_up_logger +from .custom_logging import ( + failed_to_start, + FMLOGGER, + getLogger, + IFLOGGER, + set_up_logger, + UTLOGGER, + WFLOGGER, +) from .monitoring import ( - LoggingHTTPServer, - LoggingRequestHandler, log_nodes_cb, log_nodes_initial, + LoggingHTTPServer, + LoggingRequestHandler, monitor_server, recurse_nodes, ) __all__ = [ "failed_to_start", + "FMLOGGER", "getLogger", + "IFLOGGER", "LoggingHTTPServer", "LoggingRequestHandler", "log_nodes_cb", @@ -25,5 +51,7 @@ "monitor_server", "recurse_nodes", "set_up_logger", + "UTLOGGER", "WARNING_FREESURFER_OFF_WITH_DATA", + "WFLOGGER", ] diff --git a/CPAC/utils/monitoring/custom_logging.py b/CPAC/utils/monitoring/custom_logging.py index 812dcbaf13..edd1d68ce6 100644 --- a/CPAC/utils/monitoring/custom_logging.py +++ b/CPAC/utils/monitoring/custom_logging.py @@ -1,22 +1,20 @@ -"""Funtions for logging. +# Copyright (C) 2022-2023 C-PAC Developers -Copyright (C) 2022-2023 C-PAC Developers +# This file is part of C-PAC. -This file is part of C-PAC. +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -C-PAC is free software: you can redistribute it and/or modify it under -the terms of the GNU Lesser General Public License as published by the -Free Software Foundation, either version 3 of the License, or (at your -option) any later version. +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. -C-PAC is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with C-PAC. If not, see . -""" +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Funtions for logging.""" import logging import os import subprocess @@ -290,3 +288,10 @@ def set_up_logger( handler = logging.FileHandler(filepath) logger.addHandler(handler) return logger + + +# Nipype built-in loggers +IFLOGGER = getLogger("nipype.interface") +FMLOGGER = getLogger("nipype.filemanip") +UTLOGGER = getLogger("nipype.utils") +WFLOGGER = getLogger("nipype.workflow") diff --git a/CPAC/utils/monitoring/draw_gantt_chart.py b/CPAC/utils/monitoring/draw_gantt_chart.py index 6ebb3645cb..546551de7d 100644 --- a/CPAC/utils/monitoring/draw_gantt_chart.py +++ b/CPAC/utils/monitoring/draw_gantt_chart.py @@ -97,7 +97,7 @@ def create_event_dict(start_time, nodes_list): # Populate dictionary if events.get(start_delta): - err_msg = "Event logged twice or events started at exact same " "time!" + err_msg = "Event logged twice or events started at exact same time!" warn(str(KeyError(err_msg)), category=Warning) events[start_delta] = start_node events[finish_delta] = finish_node @@ -418,7 +418,7 @@ def generate_gantt_chart( html_string += ( "

Finish: " + last_node["finish"].strftime("%Y-%m-%d %H:%M:%S") + "

" ) - html_string += "

Duration: " + "{0:.2f}".format(duration / 60) + " minutes

" + html_string += "

Duration: " + f"{duration / 60:.2f}" + " minutes

" html_string += "

Nodes: " + str(len(nodes_list)) + "

" html_string += "

Cores: " + str(cores) + "

" html_string += close_header @@ -654,7 +654,8 @@ def _timing_timestamp(node): dict """ if node is None or node.items() is None: - raise ProcessLookupError("No logged nodes have timing information.") + msg = "No logged nodes have timing information." + raise ProcessLookupError(msg) return { k: ( datetime.strptime(v, "%Y-%m-%dT%H:%M:%S.%f") diff --git a/CPAC/utils/ndmg_utils.py b/CPAC/utils/ndmg_utils.py index 11ad7f002f..0623118e75 100644 --- a/CPAC/utils/ndmg_utils.py +++ b/CPAC/utils/ndmg_utils.py @@ -1,44 +1,48 @@ -""" -Functions in this file adapted from NeuroData group: -STATEMENT OF CHANGES: - This file is derived from sources licensed under the Apache-2.0 terms, - and this file has been changed. +# Functions in this file adapted from NeuroData group: -CHANGES: - * Minor refactoring for compatibility with C-PAC +# STATEMENT OF CHANGES: +# This file is derived from sources licensed under the Apache-2.0 terms, +# and this file has been changed. -ORIGINAL WORK'S ATTRIBUTION NOTICE: - Copyright 2016 NeuroData (http://neurodata.io) +# CHANGES: +# * Minor refactoring for compatibility with C-PAC - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +# ORIGINAL WORK'S ATTRIBUTION NOTICE: +# Copyright 2016 NeuroData (http://neurodata.io) - http://www.apache.org/licenses/LICENSE-2.0 +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. - graph.py - Created by Greg Kiar on 2016-01-27. - Email: gkiar@jhu.edu +# graph.py +# Created by Greg Kiar on 2016-01-27. +# Email: gkiar@jhu.edu -Can be found here: - https://github.com/neurodata/m2g/blob/v0.1.0/ndmg/graph/graph.py +# Can be found here: +# https://github.com/neurodata/m2g/blob/v0.1.0/ndmg/graph/graph.py -Modifications Copyright (C) 2022 C-PAC Developers +# Modifications Copyright (C) 2022-2024 C-PAC Developers -This file is part of C-PAC. -""" +# This file is part of C-PAC. +from logging import basicConfig, INFO import os import numpy as np import nibabel as nib +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("nuerodata.m2g.ndmg") +basicConfig(format="%(message)s", level=INFO) + def ndmg_roi_timeseries(func_file, label_file): """ @@ -75,7 +79,7 @@ def ndmg_roi_timeseries(func_file, label_file): err = ( "\n[!] Error: functional data and ROI mask may not be in " "the same space or be the same size.\nDetails: " - "{0}".format(e) + f"{e}" ) raise IndexError(err) # take the mean for the voxel timeseries, and ignore voxels with @@ -146,13 +150,15 @@ def make_graph(self, streamlines, attr=None): ecount=0, vcount=len(self.n_ids), ) + logger.info(self.g.graph) [str(self.g.add_node(ids)) for ids in self.n_ids] nlines = np.shape(streamlines)[0] + logger.info("# of Streamlines: %s", nlines) print_id = np.max((int(nlines * 0.05), 1)) # in case nlines*.05=0 for idx, streamline in enumerate(streamlines): if (idx % print_id) == 0: - pass + logger.info(idx) points = np.round(streamline).astype(int) p = set() @@ -183,8 +189,9 @@ def cor_graph(self, timeseries, attr=None): """ import numpy as np - timeseries[0] + ts = timeseries[0] # noqa: F841 rois = timeseries[1] + logger.info("Estimating correlation matrix for %s ROIs...", self.N) self.g = np.abs(np.corrcoef(timeseries)) # calculate pearson correlation self.g = np.nan_to_num(self.g).astype(object) self.n_ids = rois @@ -203,6 +210,7 @@ def get_graph(self): try: return self.g except AttributeError: + logger.error("The graph has not yet been defined.") pass def as_matrix(self): @@ -235,12 +243,14 @@ def save_graph(self, graphname): header=",".join([str(n) for n in self.n_ids]), ) else: - raise ValueError("Unsupported Modality.") - pass + msg = "Unsupported Modality." + raise ValueError(msg) def summary(self): """User friendly wrapping and display of graph properties.""" - pass + import networkx as nx + + logger.info("\n Graph Summary: %s", nx.info(self.g)) def ndmg_create_graphs(ts, labels): diff --git a/CPAC/utils/nifti_utils.py b/CPAC/utils/nifti_utils.py index cab42b51cb..2f4e0e7cb3 100644 --- a/CPAC/utils/nifti_utils.py +++ b/CPAC/utils/nifti_utils.py @@ -1,14 +1,31 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Utlities for NIfTI images.""" import os +from typing import Union import numpy as np -import six import nibabel as nib -def nifti_image_input(image): - """ - Test if an input is a path or a nifti.image and the image loaded through - nibabel. +def nifti_image_input( + image: Union[str, nib.nifti1.Nifti1Image], +) -> nib.nifti1.Nifti1Image: + """Test if an input is a path or a nifti.image and the image loaded through nibabel. Parameters. ---------- @@ -23,19 +40,20 @@ def nifti_image_input(image): """ if isinstance(image, nib.nifti1.Nifti1Image): img = image - elif isinstance(image, six.string_types): + elif isinstance(image, str): if not os.path.exists(image): - raise ValueError(str(image) + " does not exist.") - else: - img = nib.load(image) + msg = f"{image} does not exist." + raise FileNotFoundError(msg) + img = nib.load(image) else: - raise TypeError("Image can be either a string or a nifti1.Nifti1Image") + msg = "Image can be either a string or a nifti1.Nifti1Image" + raise TypeError(msg) return img def more_zeros_than_ones(image): """ - Return True is there is more zeros than other values in a given nifti image. + Return True if there are more zeros than other values in a given nifti image. Parameters. ---------- @@ -47,16 +65,7 @@ def more_zeros_than_ones(image): ------- more_zeros : boolean """ - if isinstance(image, nib.nifti1.Nifti1Image): - img = image - elif isinstance(image, six.string_types): - if not os.path.exists(image): - raise ValueError(str(image) + " does not exist.") - else: - img = nib.load(image) - else: - raise TypeError("Image can be either a string or a nifti1.Nifti1Image") - + img = nifti_image_input(image) data = img.get_fdata() nb_zeros = len(np.where(data == 0)[0]) size = data.size @@ -77,16 +86,7 @@ def inverse_nifti_values(image): ------- output : Nibabel Nifti1Image """ - if isinstance(image, nib.nifti1.Nifti1Image): - img = image - elif isinstance(image, six.string_types): - if not os.path.exists(image): - raise ValueError(str(image) + " does not exist.") - else: - img = nib.load(image) - else: - raise TypeError("Image can be either a string or a nifti1.Nifti1Image") - + img = nifti_image_input(image) data = img.get_fdata() zeros = np.where(data) out_data = np.ones(data.shape) diff --git a/CPAC/utils/outputs.py b/CPAC/utils/outputs.py index d814564ef1..72d95244d3 100644 --- a/CPAC/utils/outputs.py +++ b/CPAC/utils/outputs.py @@ -11,7 +11,7 @@ class Outputs: except Exception as e: err = ( "\n[!] Could not access or read the cpac_outputs.tsv " - "resource file:\n{0}\n\nError details {1}\n".format(reference_csv, e) + f"resource file:\n{reference_csv}\n\nError details {e}\n" ) raise Exception(err) diff --git a/CPAC/utils/sklearn.py b/CPAC/utils/sklearn.py new file mode 100644 index 0000000000..8f9077b5ec --- /dev/null +++ b/CPAC/utils/sklearn.py @@ -0,0 +1,58 @@ +# New BSD License + +# Copyright (c) 2007–2018 The scikit-learn developers. +# All rights reserved. +# Modifications copyright (c) 2019-2024 C-PAC Developers. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: + +# a. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# b. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# c. Neither the name of the Scikit-learn Developers nor the names of +# its contributors may be used to endorse or promote products +# derived from this software without specific prior written +# permission. + + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. +"""Code from scikit-learn (https://github.com/scikit-learn/scikit-learn).""" +import numbers +from typing import Union + +import numpy as np +from numpy.random import RandomState + + +def check_random_state(seed: Union[None, int, RandomState]) -> RandomState: + """Turn seed into a np.random.RandomState instance. + + Parameters + ---------- + seed : None | int | instance of RandomState + If seed is None, return the RandomState singleton used by np.random. + If seed is an int, return a new RandomState instance seeded with seed. + If seed is already a RandomState instance, return it. + Otherwise raise ValueError. + """ + if seed is None or seed is np.random: + return np.random.mtrand._rand + if isinstance(seed, (numbers.Integral, np.integer)): + return np.random.RandomState(seed) + if isinstance(seed, np.random.RandomState): + return seed + msg = f"{seed!r} cannot be used to seed a numpy.random.RandomState instance" + raise ValueError(msg) diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index 4423a5cd4c..67f4de5770 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2022 C-PAC Developers +# Copyright (C) 2018-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,13 +14,9 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -import logging - -import six from CPAC.pipeline.engine import ResourcePool - -logger = logging.getLogger("nipype.workflow") +from CPAC.utils.monitoring import WFLOGGER class Strategy: @@ -55,7 +51,7 @@ def get_node_from_resource_pool(self, resource_key): try: return self.resource_pool[resource_key] except: - logger.error("No node for output: %s", resource_key) + WFLOGGER.error("No node for output: %s", resource_key) raise @property @@ -81,15 +77,15 @@ def get(self, resource_key): return self.resource_pool.get(resource_key) def __getitem__(self, resource_key): - assert isinstance(resource_key, six.string_types) + assert isinstance(resource_key, str) try: return self.resource_pool[resource_key] except: - logger.error("No node for output: %s", resource_key) + WFLOGGER.error("No node for output: %s", resource_key) raise def __contains__(self, resource_key): - assert isinstance(resource_key, six.string_types) + assert isinstance(resource_key, str) return resource_key in self.resource_pool def fork(self): diff --git a/CPAC/utils/test_init.py b/CPAC/utils/test_init.py index d3d355269b..853756fb22 100644 --- a/CPAC/utils/test_init.py +++ b/CPAC/utils/test_init.py @@ -1,17 +1,32 @@ # CPAC/utils/test_init.py + +# Copyright (C) 2015-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . # # Contributing authors (please append): # Daniel Clark # Jon Clucas -""" -This module contains functions that assist in initializing CPAC -tests resources. -""" +"""Assist in initializing CPAC tests resources.""" from typing import Optional from nipype.interfaces.utility import IdentityInterface from CPAC.pipeline.nipype_pipeline_engine import Node +from CPAC.utils.monitoring import UTLOGGER from CPAC.utils.typing import LIST @@ -128,7 +143,7 @@ def populate_all_templates(): # Check that they all returned a value if len(outputs) == len(config_types): - pass + UTLOGGER.info("Successfully populated and saved templates!") else: err_msg = "Something went wrong during template population" raise Exception(err_msg) @@ -158,10 +173,12 @@ def return_aws_creds(): # Check if set if not creds_path: + UTLOGGER.error( + "CPAC_AWS_CREDS environment variable not set!\n" + "Set this to the filepath location of your AWS credentials." + ) creds_path = input("Enter path to AWS credentials file: ") - return None - else: - return creds_path + return creds_path # Get the default test bucket name @@ -265,6 +282,7 @@ def download_cpac_resources_from_s3(local_base): ) # Print done + UTLOGGER.info("CPAC resources folder in %s is complete!", local_base) # Look for CPAC_RESOURCE_DIR to be in environment @@ -291,6 +309,11 @@ def return_resource_dir(): # Check if set if not resource_dir: # Print notification of cpac resources directory + UTLOGGER.error( + "CPAC_RESOURCE_DIR environment variable not set! Enter directory of the" + " cpac_resources folder.\n\n*If the folder does not exist, it will be" + " downloaded under the directory specified." + ) # Get user input resource_dir = input("Enter C-PAC resources directory: ") @@ -464,6 +487,7 @@ def return_test_subj(): # Check if set and exists if not test_subj: + UTLOGGER.error("CPAC_TEST_SUBJ environment variable not set!") # Get user input test_subj = input("Enter C-PAC benchmark test subject id: ") @@ -541,10 +565,8 @@ def smooth_nii_file(self, nii_file, fwhm, mask_file=None): return smooth_arr -# Download test resource from S3 bucket def download_resource_from_s3(s3_url_path): - """ """ - + """Download test resource from S3 bucket.""" # Import packages import os import tempfile diff --git a/CPAC/utils/test_mocks.py b/CPAC/utils/test_mocks.py index 901b84659c..f0572a840f 100644 --- a/CPAC/utils/test_mocks.py +++ b/CPAC/utils/test_mocks.py @@ -12,7 +12,7 @@ def file_node(path, file_node_num=0): input_node = pe.Node( util.IdentityInterface(fields=["file"]), - name="file_node_{0}".format(file_node_num), + name=f"file_node_{file_node_num}", ) input_node.inputs.file = path return input_node, "file" @@ -155,7 +155,7 @@ def configuration_strategy_mock(method="FSL"): "_selector_CSF-2mmE-M_aC-WM-2mmE-DPC5_G-M_M-SDB_P-2/" "_spatial_map_PNAS_Smith09_rsn10_spatial_map_file_" "..cpac_templates..PNAS_Smith09_rsn10.nii.gz/" - "split_raw_volumes/temp_reg_map_000{0}.nii.gz".format(n), + f"split_raw_volumes/temp_reg_map_000{n}.nii.gz", ) for n in range(10) ], diff --git a/CPAC/utils/test_resources.py b/CPAC/utils/test_resources.py index 88ff2b2354..da58e4e0f9 100644 --- a/CPAC/utils/test_resources.py +++ b/CPAC/utils/test_resources.py @@ -1,3 +1,22 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from CPAC.utils.monitoring import WFLOGGER + + def setup_test_wf(s3_prefix, paths_list, test_name, workdirs_to_keep=None): """Set up a basic template Nipype workflow for testing single nodes or small sub-workflows. @@ -23,6 +42,7 @@ def setup_test_wf(s3_prefix, paths_list, test_name, workdirs_to_keep=None): for dirname in os.listdir(work_dir): if workdirs_to_keep: for keepdir in workdirs_to_keep: + WFLOGGER.info("%s --- %s\n", dirname, keepdir) if keepdir in dirname: continue try: @@ -43,7 +63,7 @@ def setup_test_wf(s3_prefix, paths_list, test_name, workdirs_to_keep=None): "crashdump_dir": os.path.abspath(test_dir), } - ds = pe.Node(DataSink(), name="sinker_{0}".format(test_name)) + ds = pe.Node(DataSink(), name=f"sinker_{test_name}") ds.inputs.base_directory = out_dir ds.inputs.parameterization = True diff --git a/CPAC/utils/tests/test_bids_utils.py b/CPAC/utils/tests/test_bids_utils.py index 922d1bac8a..75b1698746 100644 --- a/CPAC/utils/tests/test_bids_utils.py +++ b/CPAC/utils/tests/test_bids_utils.py @@ -1,4 +1,21 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . """Tests for bids_utils.""" +from logging import basicConfig, INFO import os import pytest @@ -12,10 +29,14 @@ load_cpac_data_config, sub_list_filter_by_labels, ) +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.utils.tests") +basicConfig(format="%(message)s", level=INFO) def create_sample_bids_structure(root_dir): - """Function to create temporary synthetic BIDS data for testing parsing.""" + """Create temporary synthetic BIDS data for testing parsing.""" def _prefix_entities(paths, path): return f'sub-{paths[path]["sub"]}_ses-{paths[path]["ses"]}' @@ -53,9 +74,7 @@ def _prefix_entities(paths, path): @pytest.mark.parametrize("only_one_anat", [True, False]) def test_create_cpac_data_config_only_one_anat(tmp_path, only_one_anat): - """Function to test 'only_one_anat' parameter of - 'create_cpac_data_config' function. - """ + """Test 'only_one_anat' parameter of 'create_cpac_data_config' function.""" create_sample_bids_structure(tmp_path) assert isinstance( create_cpac_data_config(str(tmp_path), only_one_anat=only_one_anat)[0]["anat"][ @@ -68,6 +87,7 @@ def test_create_cpac_data_config_only_one_anat(tmp_path, only_one_anat): @pytest.mark.skip(reason="needs local files not included in package") def test_gen_bids_sublist(bids_dir, test_yml, creds_path, dbg=False): (img_files, config) = collect_bids_files_configs(bids_dir, creds_path) + logger.info("Found %d config files for %d image files", len(config), len(img_files)) sublist = bids_gen_cpac_sublist(bids_dir, img_files, config, creds_path, dbg) with open(test_yml, "w") as ofd: @@ -102,6 +122,7 @@ def test_sub_list_filter_by_labels(t1w_label, bold_label, participant_label): sub_list = sub_list_filter_by_labels( sub_list, {"T1w": t1w_label, "bold": bold_labels} ) + logger.info(sub_list) if t1w_label is not None: if participant_label == "NDARAA504CRN": anat_sub_list = [sub.get("anat") for sub in sub_list] diff --git a/CPAC/utils/tests/test_symlinks.py b/CPAC/utils/tests/test_symlinks.py index 72def9a2c8..570d2e9b74 100644 --- a/CPAC/utils/tests/test_symlinks.py +++ b/CPAC/utils/tests/test_symlinks.py @@ -1,10 +1,31 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from logging import basicConfig, INFO import os import tempfile import pkg_resources as p +from CPAC.utils.monitoring.custom_logging import getLogger from CPAC.utils.symlinks import create_symlinks +logger = getLogger("CPAC.utils.tests") +basicConfig(format="%(message)s", level=INFO) + mocked_outputs = p.resource_filename( "CPAC", os.path.join("utils", "tests", "test_symlinks-outputs.txt") ) @@ -15,7 +36,8 @@ def test_symlinks(): paths = [] with open(mocked_outputs, "r") as f: - for path in f.readlines(): + for _path in f.readlines(): + path = _path path = path.strip() if path: paths += [path] @@ -24,6 +46,8 @@ def test_symlinks(): temp_dir, "sym_links", "pipeline_benchmark-FNIRT", "1019436_1", paths ) + logger.info("Links created at %s", temp_dir) + # TODO test the generated links # Normal resource case diff --git a/CPAC/utils/tests/test_yaml.py b/CPAC/utils/tests/test_yaml.py index 60e11441c4..8cacdaacaa 100644 --- a/CPAC/utils/tests/test_yaml.py +++ b/CPAC/utils/tests/test_yaml.py @@ -23,7 +23,7 @@ import pytest import yaml -from CPAC.utils.configuration import Configuration, Preconfiguration, preconfig_yaml +from CPAC.utils.configuration import Configuration, preconfig_yaml, Preconfiguration from CPAC.utils.configuration.yaml_template import create_yaml_from_template from .configs import NEUROSTARS_23786, NEUROSTARS_24035 diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py index 171ed22d04..fa1057bbcb 100644 --- a/CPAC/utils/typing.py +++ b/CPAC/utils/typing.py @@ -21,13 +21,14 @@ run Python ≥ 3.10, these global variables can be replaced with the current preferred syntax. """ +from pathlib import Path import sys from typing import Union from CPAC.utils.docs import DOCS_URL_PREFIX # Set the version-specific documentation URL in the module docstring: -__doc__ = __doc__.replace(r"{DOCS_URL_PREFIX}", DOCS_URL_PREFIX) +__doc__ = __doc__.replace(r"{DOCS_URL_PREFIX}", DOCS_URL_PREFIX) # noqa: A001 if sys.version_info >= (3, 8): from typing import Literal @@ -40,10 +41,12 @@ if sys.version_info >= (3, 9): from collections.abc import Iterable + DICT = dict LIST = list else: - from typing import Iterable, List + from typing import Dict, Iterable, List + DICT = Dict LIST = List if sys.version_info >= (3, 10): LIST_OR_STR = LIST[str] | str # pylint: disable=invalid-name @@ -54,5 +57,15 @@ LIST_OR_STR = Union[LIST[str], str] # pylint: disable=invalid-name TUPLE = Tuple ITERABLE = Iterable +PATHSTR = Union[Path, str] ConfigKeyType = Union[str, LIST[str]] -__all__ = ["ConfigKeyType", "ITERABLE", "LIST", "LIST_OR_STR", "LITERAL", "TUPLE"] +__all__ = [ + "ConfigKeyType", + "DICT", + "ITERABLE", + "LIST", + "LIST_OR_STR", + "LITERAL", + "PATHSTR", + "TUPLE", +] diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 911c51e55c..a71dafd6c6 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2023 C-PAC Developers +# Copyright (C) 2012-2024 C-PAC Developers # This file is part of C-PAC. @@ -14,20 +14,26 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""General-purpose utilities for C-PAC.""" import collections.abc from copy import deepcopy import fnmatch import gzip from itertools import repeat import json -import numbers import os import pickle +from typing import Any, Union import numpy as np from voluptuous.error import Invalid import yaml +from CPAC.utils.configuration import Configuration +from CPAC.utils.docs import deprecated +from CPAC.utils.monitoring import FMLOGGER, WFLOGGER +from CPAC.utils.typing import LIST, TUPLE + CONFIGS_DIR = os.path.abspath( os.path.join(__file__, *repeat(os.path.pardir, 2), "resources/configs/") ) @@ -39,6 +45,18 @@ os.path.join(CONFIGS_DIR, "1.7-1.8-deprecations.yml"), "r", encoding="utf-8" ) as _f: NESTED_CONFIG_DEPRECATIONS = yaml.safe_load(_f) +VALID_PATTERNS = [ + "alt+z", + "altplus", + "alt+z2", + "alt-z", + "altminus", + "alt-z2", + "seq+z", + "seqplus", + "seq-z", + "seqminus", +] YAML_BOOLS = { True: ("on", "t", "true", "y", "yes"), False: ("f", "false", "n", "no", "off"), @@ -46,82 +64,62 @@ def get_last_prov_entry(prov): + """Get the last provenance entry.""" while not isinstance(prov[-1], str): prov = prov[-1] return prov[-1] def check_prov_for_regtool(prov): + """Check provenance for registration tool.""" last_entry = get_last_prov_entry(prov) last_node = last_entry.split(":")[1] if "ants" in last_node.lower(): return "ants" - elif "fsl" in last_node.lower(): + if "fsl" in last_node.lower(): return "fsl" - else: - # go further back in case we're checking against a concatenated - # downstream xfm like bold-to-template (and prov is the provenance of - # that downstream xfm) - if "from-T1w_to-template_mode-image_xfm:" in str(prov): - splitprov = str(prov).split("from-T1w_to-template_mode-image_xfm:") - node_name = splitprov[1].split("']")[0] - if "ANTs" in node_name: - return "ants" - elif "FSL" in node_name: - return "fsl" - return None - elif "from-bold_to-template_mode-image_xfm:" in str(prov): - splitprov = str(prov).split("from-bold_to-template_mode-image_xfm:") - node_name = splitprov[1].split("']")[0] - if "ANTs" in node_name: - return "ants" - elif "FSL" in node_name: - return "fsl" - else: - return None - elif "from-T1w_to-symtemplate_mode-image_xfm:" in str(prov): - splitprov = str(prov).split("from-T1w_to-symtemplate_mode-image_xfm:") - node_name = splitprov[1].split("']")[0] - if "ANTs" in node_name: - return "ants" - elif "FSL" in node_name: - return "fsl" - return None - elif "from-bold_to-symtemplate_mode-image_xfm:" in str(prov): - splitprov = str(prov).split("from-bold_to-symtemplate_mode-image_xfm:") + # go further back in case we're checking against a concatenated + # downstream xfm like bold-to-template (and prov is the provenance of + # that downstream xfm) + for key in [ + "from-T1w_to-template_mode-image_xfm:", + "from-bold_to-template_mode-image_xfm:", + "from-T1w_to-symtemplate_mode-image_xfm:", + "from-bold_to-symtemplate_mode-image_xfm:", + ]: + if key in str(prov): + splitprov = str(prov).split(key) node_name = splitprov[1].split("']")[0] if "ANTs" in node_name: return "ants" - elif "FSL" in node_name: + if "FSL" in node_name: return "fsl" - else: - return None - else: return None + return None def check_prov_for_motion_tool(prov): + """Check provenance for motion correction tool.""" last_entry = get_last_prov_entry(prov) last_node = last_entry.split(":")[1] if "3dvolreg" in last_node.lower(): return "3dvolreg" - elif "mcflirt" in last_node.lower(): + if "mcflirt" in last_node.lower(): return "mcflirt" - else: - # check entire prov - if "3dvolreg" in str(prov): - return "3dvolreg" - elif "mcflirt" in str(prov): - return "mcflirt" - else: - return None + # check entire prov + if "3dvolreg" in str(prov): + return "3dvolreg" + if "mcflirt" in str(prov): + return "mcflirt" + return None -def get_flag(in_flag): +def _get_flag(in_flag): return in_flag def get_flag_wf(wf_name="get_flag"): + """Create a workflow to get a flag.""" import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -131,18 +129,20 @@ def get_flag_wf(wf_name="get_flag"): input_node = pe.Node(util.IdentityInterface(fields=["in_flag"]), name="inputspec") get_flag = pe.Node( - util.Function(input_names=["in_flag"], function=get_flag), name="get_flag" + util.Function(input_names=["in_flag"], function=_get_flag), name="get_flag" ) wf.connect(input_node, "in_flag", get_flag, "in_flag") def read_json(json_file): + """Read a JSON file and return the contents as a dictionary.""" try: with open(json_file, "r") as f: json_dct = json.load(f) except json.decoder.JSONDecodeError as err: - raise Exception(f"\n\n{err}\n\nJSON file: {json_file}\n") + msg = f"\n\n{err}\n\nJSON file: {json_file}\n" + raise Exception(msg) return json_dct @@ -156,8 +156,7 @@ def create_id_string( fwhm=None, subdir=None, ): - """Create the unique key-value identifier string for BIDS-Derivatives - compliant file names. + """Create the unique key-value identifier string for BIDS-Derivatives file names. This is used in the file renaming performed during the Datasink connections. @@ -207,7 +206,8 @@ def create_id_string( out_filename = out_filename.replace(tag, newtag) break else: - raise Exception("\n[!] FWHM provided but no desc-sm?\n") + msg = "\n[!] FWHM provided but no desc-sm?\n" + raise Exception(msg) # drop space- entities from from native-space filenames if subdir == "anat": @@ -218,6 +218,7 @@ def create_id_string( def write_output_json(json_data, filename, indent=3, basedir=None): + """Write a dictionary to a JSON file.""" if not basedir: basedir = os.getcwd() if ".json" not in filename: @@ -369,7 +370,7 @@ def get_zscore(map_node=False, wf_name="z_score"): def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): - """Runs the compute_fisher_z_score function as part of a one-node workflow.""" + """Run the compute_fisher_z_score function as part of a one-node workflow.""" import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe @@ -416,11 +417,10 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): - """ - Computes the fisher z transform of the input correlation map + """Compute the fisher z transform of the input correlation map. + If the correlation map contains data for multiple ROIs then - the function returns z score for each ROI as a seperate nifti - file. + return z score for each ROI as a seperate NIfTI file. Parameters @@ -471,9 +471,22 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): return out_file -def get_operand_string(mean, std_dev): +def fetch_and_convert( + scan_parameters: dict, scan: str, keys: LIST[str], convert_to: type, fallback: Any +) -> Any: + """Fetch a parameter from a scan parameters dictionary and convert it to a given type. + + Catch TypeError exceptions and return a fallback value in those cases. """ - Method to get operand string for Fsl Maths. + try: + value = convert_to(scan_parameters, None, scan, keys) + except TypeError: + value = fallback + return value + + +def get_operand_string(mean, std_dev): + """Get operand string for fslmaths. Parameters ---------- @@ -491,50 +504,10 @@ def get_operand_string(mean, std_dev): return str1 + " -mas %s" -def get_roi_num_list(timeseries_file, prefix=None): - # extracts the ROI labels from the 3dROIstats output CSV file - with open(timeseries_file, "r") as f: - roi_file_lines = f.read().splitlines() - - roi_err = ( - "\n\n[!] The output of 3dROIstats, used in extracting the " - "timeseries, is either empty, or not in the expected " - "format.\n\nROI output file: {0}\n\nIf there are no rows " - "in the output file, double-check your ROI/mask selection." - "\n\n".format(str(timeseries_file)) - ) - - for line in roi_file_lines: - if "Mean_" in line: - try: - roi_list = line.split(",") - # clear out any blank strings/non ROI labels in the list - roi_list = [x for x in roi_list if "Mean" in x] - # rename labels - roi_list = [ - x.replace("Mean", "ROI").replace(" ", "").replace("#", "") - for x in roi_list - ] - except: - raise Exception(roi_err) - break - else: - raise Exception(roi_err) - - if prefix: - temp_rois = [] - for roi in roi_list: - roi = prefix + "_" + str(roi) - temp_rois.append(roi) - roi_list = temp_rois - - return roi_list - - def safe_shape(*vol_data): - """ - Checks if the volume (first three dimensions) of multiple ndarrays - are the same shape. + """Check if the volume of multiple ndarrays are the same shape. + + The volume is encoded in the first three dimensions of the ndarray. Parameters ---------- @@ -555,46 +528,8 @@ def safe_shape(*vol_data): return same_volume -def extract_one_d(list_timeseries): - if isinstance(list_timeseries, str): - if ".1D" in list_timeseries or ".csv" in list_timeseries: - return list_timeseries - - for timeseries in list_timeseries: - if ".1D" in timeseries or ".csv" in timeseries: - return timeseries - - raise Exception( - "Unable to retrieve roi timeseries 1D or csv" - " file. Files found:" + list_timeseries - ) - - -def extract_txt(list_timeseries): - """ - Method to extract txt file containing - roi timeseries required for dual regression. - """ - if isinstance(list_timeseries, str): - if list_timeseries.endswith(".txt"): - return list_timeseries - - out_file = None - for timeseries in list_timeseries: - if timeseries.endswith(".txt"): - out_file = timeseries - - if not out_file: - raise Exception( - "Unable to retrieve roi timeseries txt" - " file required for dual regression." - " Existing files are:%s" % (list_timeseries) - ) - - return out_file - - def zscore(data, axis): + """Calculate the z-score of a dataset along a given axis.""" data = data.copy() data -= data.mean(axis=axis, keepdims=True) data /= data.std(axis=axis, keepdims=True) @@ -603,12 +538,13 @@ def zscore(data, axis): def correlation(matrix1, matrix2, match_rows=False, z_scored=False, symmetric=False): + """Calcluate the correlation between two matrices.""" d1 = matrix1.shape[-1] d2 = matrix2.shape[-1] assert d1 == d2 - assert matrix1.ndim <= 2 - assert matrix2.ndim <= 2 + assert matrix1.ndim <= 2 # noqa: PLR2004 + assert matrix2.ndim <= 2 # noqa: PLR2004 if match_rows: assert matrix1.shape == matrix2.shape @@ -635,13 +571,11 @@ def correlation(matrix1, matrix2, match_rows=False, z_scored=False, symmetric=Fa def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): + """Check that a value is populated for a given key in a parameters dictionary.""" if val_to_check not in params_dct: if throw_exception: - raise Exception( - "Missing Value for {0} for participant " "{1}".format( - val_to_check, subject_id - ) - ) + msg = f"Missing Value for {val_to_check} for participant " f"{subject_id}" + raise ValueError(msg) return None if isinstance(params_dct[val_to_check], dict): @@ -651,53 +585,26 @@ def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): if ret_val == "None": if throw_exception: - raise Exception( - "'None' Parameter Value for {0} for participant " "{1}".format( - val_to_check, subject_id - ) + msg = ( + f"'None' Parameter Value for {val_to_check} for" + f" participant {subject_id}" ) - else: - ret_val = None + raise ValueError(msg) + ret_val = None if ret_val == "" and throw_exception: - raise Exception( - "Missing Value for {0} for participant " "{1}".format( - val_to_check, subject_id - ) - ) + msg = f"Missing Value for {val_to_check} for participant {subject_id}" + raise ValueError(msg) return ret_val -def check_random_state(seed): - """ - Turn seed into a np.random.RandomState instance - Code from scikit-learn (https://github.com/scikit-learn/scikit-learn). - - Parameters - ---------- - seed : None | int | instance of RandomState - If seed is None, return the RandomState singleton used by np.random. - If seed is an int, return a new RandomState instance seeded with seed. - If seed is already a RandomState instance, return it. - Otherwise raise ValueError. - """ - if seed is None or seed is np.random: - return np.random.mtrand._rand - if isinstance(seed, (numbers.Integral, np.integer)): - return np.random.RandomState(seed) - if isinstance(seed, np.random.RandomState): - return seed - raise ValueError( - "%r cannot be used to seed a numpy.random.RandomState" " instance" % seed - ) - - def try_fetch_parameter(scan_parameters, subject, scan, keys): + """Try to fetch a parameter from a scan parameters dictionary.""" scan_parameters = {k.lower(): v for k, v in scan_parameters.items()} - for key in keys: - key = key.lower() + for _key in keys: + key = _key.lower() if key not in scan_parameters: continue @@ -723,9 +630,7 @@ def get_scan_params( pipeconfig_stop_indx, data_config_scan_params=None, ): - """ - Method to extract slice timing correction parameters - and scan parameters. + """Extract slice timing correction parameters and scan parameters. Parameters ---------- @@ -763,15 +668,9 @@ def check2(val): return val if val is None or val == "" or isinstance(val, str) else int(val) # initialize vars to empty - TR = "" - pattern = "" - ref_slice = "" - first_tr = "" - last_tr = "" + TR = pattern = ref_slice = first_tr = last_tr = pe_direction = "" unit = "s" - pe_direction = "" - effective_echo_spacing = None - template = None + effective_echo_spacing = template = None if isinstance(pipeconfig_stop_indx, str): if "End" in pipeconfig_stop_indx or "end" in pipeconfig_stop_indx: @@ -780,11 +679,10 @@ def check2(val): if ".json" in data_config_scan_params: if not os.path.exists(data_config_scan_params): err = ( - "\n[!] WARNING: Scan parameters JSON file listed in " - "your data configuration file does not exist:\n{0}" - "\n\n".format(data_config_scan_params) + "\n[!] WARNING: Scan parameters JSON file listed in your data" + f" configuration file does not exist:\n{data_config_scan_params}" ) - raise Exception(err) + raise FileNotFoundError(err) with open(data_config_scan_params, "r") as f: params_dct = json.load(f) @@ -821,23 +719,12 @@ def check2(val): # TODO: better handling of errant key values!!! # TODO: use schema validator to deal with it # get details from the configuration - try: - TR = float( - try_fetch_parameter( - params_dct, subject_id, scan, ["TR", "RepetitionTime"] - ) - ) - except TypeError: - TR = None - - try: - template = str( - try_fetch_parameter( - params_dct, subject_id, scan, ["Template", "template"] - ) - ) - except TypeError: - template = None + TR = fetch_and_convert( + params_dct, scan, ["TR", "RepetitionTime"], float, None + ) + template = fetch_and_convert( + params_dct, scan, ["Template", "template"], str, None + ) pattern = str( try_fetch_parameter( @@ -849,28 +736,24 @@ def check2(val): ) ref_slice = check(params_dct, subject_id, scan, "reference", False) - if ref_slice: - ref_slice = int(ref_slice) + ref_slice = int(ref_slice) if ref_slice else ref_slice first_tr = check(params_dct, subject_id, scan, "first_TR", False) - if first_tr: - first_tr = check2(first_tr) + first_tr = check2(first_tr) if first_tr else first_tr last_tr = check(params_dct, subject_id, scan, "last_TR", False) - if last_tr: - last_tr = check2(last_tr) + last_tr = check2(last_tr) if last_tr else last_tr pe_direction = check( params_dct, subject_id, scan, "PhaseEncodingDirection", False ) - try: - effective_echo_spacing = float( - try_fetch_parameter( - params_dct, subject_id, scan, ["EffectiveEchoSpacing"] - ) - ) - except TypeError: - pass + effective_echo_spacing = fetch_and_convert( + params_dct, + scan, + ["EffectiveEchoSpacing"], + float, + effective_echo_spacing, + ) else: err = ( @@ -878,17 +761,10 @@ def check2(val): "information included in the data configuration file for " f"the participant {subject_id}.\n\n" ) - raise Exception(err) - if first_tr == "" or first_tr is None: - first_tr = pipeconfig_start_indx - - if last_tr == "" or last_tr is None: - last_tr = pipeconfig_stop_indx - - unit = "s" - - if "None" in pattern or "none" in pattern: - pattern = None + raise OSError(err) + first_tr = pipeconfig_start_indx if first_tr == "" or first_tr is None else first_tr + last_tr = pipeconfig_stop_indx if last_tr == "" or last_tr is None else last_tr + pattern = None if "None" in pattern or "none" in pattern else pattern """ if not pattern: @@ -903,19 +779,7 @@ def check2(val): # indicates that the images header information should be used tpattern_file = None - valid_patterns = [ - "alt+z", - "altplus", - "alt+z2", - "alt-z", - "altminus", - "alt-z2", - "seq+z", - "seqplus", - "seq-z", - "seqminus", - ] - if pattern and pattern != "" and pattern not in valid_patterns: + if pattern and pattern != "" and pattern not in VALID_PATTERNS: if isinstance(pattern, list) or ( "[" in pattern and "]" in pattern and "," in pattern ): @@ -932,33 +796,34 @@ def check2(val): try: with open(tpattern_file, "wt") as f: for time in slice_timings: - f.write("{0}\n".format(time).replace(" ", "")) - except: + f.write(f"{time}\n".replace(" ", "")) + except (OSError, TypeError) as e: err = ( "\n[!] Could not write the slice timing file meant as " "an input for AFNI 3dTshift (slice timing correction):" - "\n{0}\n\n".format(tpattern_file) + f"\n{tpattern_file}\n\n" ) - raise Exception(err) + raise OSError(err) from e elif ".txt" in pattern and not os.path.exists(pattern): # if the user provided an acquisition pattern text file for # 3dTshift - raise Exception( - "Invalid Pattern file path {0}, Please provide " - "the correct path".format(pattern) + msg = ( + f"Invalid Pattern file path {pattern}, Please provide " + "the correct path" ) + raise Exception(msg) elif ".txt" in pattern: with open(pattern, "r") as f: lines = f.readlines() - if len(lines) < 2: - raise Exception( - "Invalid slice timing file format. The file " - "should contain only one value per row. Use " - "new line char as delimiter" + if len(lines) < 2: # noqa: PLR2004 + msg = ( + "Invalid slice timing file format. The file should contain only one" + " value per row. Use new line char as delimiter" ) + raise Exception(msg) tpattern_file = pattern - slice_timings = [float(l.rstrip("\r\n")) for l in lines] + slice_timings = [float(l.rstrip("\r\n")) for l in lines] # noqa: E741 else: # this only happens if there is a non-path string set in the data # config dictionary for acquisition pattern (like "alt+z"), except @@ -966,7 +831,7 @@ def check2(val): err = ( "\n[!] The slice timing acquisition pattern provided is " "not supported by AFNI 3dTshift:\n" - "{0}\n".format(str(pattern)) + f"{pattern!s}\n" ) raise Exception(err) @@ -983,21 +848,18 @@ def check2(val): "milliseconds. Converting TR into milliseconds" ) TR = TR * 1000 + WFLOGGER.info("New TR value %s ms", TR) unit = "ms" - else: + elif TR and TR > 10: # noqa: PLR2004 # check to see, if TR is in milliseconds, convert it into seconds - if TR and TR > 10: - warnings.warn("TR is in milliseconds, Converting it into seconds") - TR = TR / 1000.0 - unit = "s" + warnings.warn("TR is in milliseconds, Converting it into seconds") + TR = TR / 1000.0 + WFLOGGER.info("New TR value %s s", TR) + unit = "s" # swap back in - if TR: - tr = "{0}{1}".format(str(TR), unit) - else: - tr = "" - + tr = f"{TR!s}{unit}" if TR else "" tpattern = pattern start_indx = first_tr stop_indx = last_tr @@ -1014,71 +876,21 @@ def check2(val): ) -def get_tr(tr): - """Method to return TR in seconds.""" - import re - - if tr: - tr = re.search(r"\d+.\d+", str(tr)).group(0) - tr = float(tr) - if tr > 10: - tr = tr / 1000.0 - else: - tr = "" - return tr - - -def check_tr(tr, in_file): - # imageData would have to be the image data from the funcFlow workflow, - # funcFlow outputspec.subject - import nibabel as nib - - img = nib.load(in_file) - - # get header from image data, then extract TR information, TR is fourth - # item in list returned by get_zooms() - imageHeader = img.header - imageZooms = imageHeader.get_zooms() - header_tr = imageZooms[3] - - # If the TR information from header_tr (funcFlow) and convert_tr node - # (TR from config file) do not match, prepare to update the TR information - # from either convert_tr or header_tr using afni 3drefit, then append to - # func_to_mni - if header_tr != tr: - if tr is not None and tr != "": - TR = tr - else: - TR = header_tr - - import warnings - - warnings.warn( - "Warning: The TR information does not match between " - "the config and subject list files." - ) - - return TR - - def add_afni_prefix(tpattern): + """Add '@' prefix to tpattern.txt filename.""" if ".txt" in tpattern: - tpattern = "@{0}".format(tpattern) + tpattern = f"@{tpattern}" return tpattern def write_to_log(workflow, log_dir, index, inputs, scan_id): - """Method to write into log file the status of the workflow run.""" + """Write into log file the status of the workflow run.""" import datetime import os import time - from nipype import logging - from CPAC import __version__ - iflogger = logging.getLogger("nipype.interface") - version = __version__ subject_id = os.path.basename(log_dir) @@ -1106,68 +918,75 @@ def write_to_log(workflow, log_dir, index, inputs, scan_id): try: os.makedirs(file_path) except Exception: - iflogger.info( - "filepath already exist, filepath- {0}, " "curr_dir - {1}".format( - file_path, os.getcwd() - ) + FMLOGGER.error( + "filepath already exist, filepath- %s, curr_dir - %s", + file_path, + os.getcwd(), ) else: file_path = os.path.join(log_dir, scan_id) - except Exception: - raise + except Exception as e: + msg = "ERROR in write log" + raise OSError(msg) from e try: os.makedirs(file_path) except Exception: - iflogger.info( - "filepath already exist, " "filepath: {0}, " "curr_dir: {1}".format( - file_path, os.getcwd() - ) + FMLOGGER.error( + "filepath already exist, filepath: %s, curr_dir: %s", file_path, os.getcwd() ) - out_file = os.path.join(file_path, "log_{0}.yml".format(strategy)) + out_file = os.path.join(file_path, f"log_{strategy}.yml") - iflogger.info("CPAC custom log:") + WFLOGGER.info("CPAC custom log:") if isinstance(inputs, list): inputs = inputs[0] if os.path.exists(inputs): status_msg = "wf_status: DONE" - iflogger.info( - "version: {0}, " - "timestamp: {1}, " - "subject_id: {2}, " - "scan_id: {3}, " - "strategy: {4}, " - "workflow: {5}, " - "status: COMPLETED".format( - str(version), str(stamp), subject_id, scan_id, strategy, workflow - ) + WFLOGGER.info( + "version: %s, " + "timestamp: %s, " + "subject_id: %s, " + "scan_id: %s, " + "strategy: %s, " + "workflow: %s, " + "status: COMPLETED", + version, + stamp, + subject_id, + scan_id, + strategy, + workflow, ) else: status_msg = "wf_status: ERROR" - iflogger.info( - "version: {0}, " - "timestamp: {1}, " - "subject_id: {2}, " - "scan_id: {3}, " - "strategy: {4}, " - "workflow: {5}, " - "status: ERROR".format( - str(version), str(stamp), subject_id, scan_id, strategy, workflow - ) + WFLOGGER.error( + "version: %s, " + "timestamp: %s, " + "subject_id: %s, " + "scan_id: %s, " + "strategy: %s, " + "workflow: %s, " + "status: ERROR", + version, + stamp, + subject_id, + scan_id, + strategy, + workflow, ) with open(out_file, "w") as f: - f.write("version: {0}\n".format(str(version))) - f.write("timestamp: {0}\n".format(str(stamp))) - f.write("pipeline_index: {0}\n".format(index)) - f.write("subject_id: {0}\n".format(subject_id)) - f.write("scan_id: {0}\n".format(scan_id)) - f.write("strategy: {0}\n".format(strategy)) - f.write("workflow_name: {0}\n".format(workflow)) + f.write(f"version: {version!s}\n") + f.write(f"timestamp: {stamp!s}\n") + f.write(f"pipeline_index: {index}\n") + f.write(f"subject_id: {subject_id}\n") + f.write(f"scan_id: {scan_id}\n") + f.write(f"strategy: {strategy}\n") + f.write(f"workflow_name: {workflow}\n") f.write(status_msg) return out_file @@ -1222,12 +1041,8 @@ def create_log(wf_name="log", scan_id=None): return wf -def pick_wm(seg_prob_list): - seg_prob_list.sort() - return seg_prob_list[-1] - - def find_files(directory, pattern): + """Find files in directory.""" for root, dirs, files in os.walk(directory): for basename in files: if fnmatch.fnmatch(basename, pattern): @@ -1236,7 +1051,8 @@ def find_files(directory, pattern): def extract_output_mean(in_file, output_name): - """ + """Copy from a 1D file to a text file. + function takes 'in_file', which should be an intermediary 1D file from individual-level analysis, containing the mean of the output across all voxels. @@ -1266,7 +1082,7 @@ def extract_output_mean(in_file, output_name): if ".1D" in filename: filename = filename.replace(".1D", "") - resource_name = "{0}_{1}_{2}".format(output_name, maskname, filename) + resource_name = f"{output_name}_{maskname}_{filename}" elif "_spatial_map_" in in_file and "dr_tempreg" in in_file: for dirname in split_fullpath: @@ -1278,7 +1094,7 @@ def extract_output_mean(in_file, output_name): if ".1D" in filename: filename = filename.replace(".1D", "") - resource_name = "{0}_{1}_{2}".format(output_name, mapname, filename) + resource_name = f"{output_name}_{mapname}_{filename}" elif "_mask_" in in_file and "centrality" in in_file: for dirname in split_fullpath: @@ -1290,14 +1106,12 @@ def extract_output_mean(in_file, output_name): if ".1D" in filename: filename = filename.replace(".1D", "") - resource_name = "{0}_{1}_{2}".format(output_name, maskname, filename) + resource_name = f"{output_name}_{maskname}_{filename}" else: resource_name = output_name - output_means_file = os.path.join( - os.getcwd(), "mean_{0}.txt".format(resource_name) - ) + output_means_file = os.path.join(os.getcwd(), f"mean_{resource_name}.txt") with open(output_means_file, "w") as f: f.write(line) @@ -1305,57 +1119,11 @@ def extract_output_mean(in_file, output_name): return output_means_file -def create_output_mean_csv(subject_dir): - """ - this function finds all of the mean_{output}.txt files in the subject's - output directory, collects the data and organizes them into one .csv - file in the subject directory. - """ - import os - - output_vals = {} - - subID = subject_dir.split("/")[len(subject_dir.split("/")) - 1] - means_dir = os.path.join(subject_dir, "output_means") - - # extract the mean values - for root, _, files in os.walk(means_dir): - for filename in files: - if "mean_" in filename: - output = filename.replace("mean_", "") - output = output.replace(".txt", "") - - filepath = os.path.join(root, filename) - - if os.path.exists(filepath): - try: - mean_file = open(filepath, "rU") - val = mean_file.readline() - val = val.strip("\n") - except: - raise Exception - - else: - raise Exception - - output_vals[output] = val - - # now take the extracted mean values and write them into the .csv file! - csv_file_path = os.path.join(subject_dir, "output_means_%s.csv" % subID) - with open(csv_file_path, "wt") as csv_file: - output_items = list(output_vals.items()) - - deriv_string = ",".join(v for v, _ in output_items) - val_string = ",".join(v for _, v in output_items) - - csv_file.write(deriv_string + "\n") - csv_file.write(val_string + "\n") - - def check_command_path(path): + """Chek if command path exists.""" import os - return os.system("%s >/dev/null 2>&1" % path) != 32512 + return os.system("%s >/dev/null 2>&1" % path) != 32512 # noqa: PLR2004 def check_system_deps( @@ -1364,10 +1132,7 @@ def check_system_deps( check_centrality_degree=False, check_centrality_lfcd=False, ): - """ - Function to check system for neuroimaging tools AFNI, C3D, FSL, - and (optionally) ANTs. - """ + """Check system for neuroimaging tools AFNI, C3D, FSL and ANTs.""" missing_install = [] # Check AFNI @@ -1410,8 +1175,10 @@ def check_system_deps( raise Exception(err) -# Check pipeline config againts computer resources -def check_config_resources(c): +def check_config_resources( + c: Union[Configuration, dict], +) -> TUPLE[Union[float, int], int, int, int]: + """Check pipeline config againts computer resources.""" # Import packages from multiprocessing import cpu_count @@ -1518,15 +1285,17 @@ def check_config_resources(c): def _check_nested_types(d, keys): - """Helper function to check types for *_nested_value functions.""" + """Check types for *_nested_value functions.""" if not isinstance(d, dict): - raise TypeError(f"Expected dict, got {type(d).__name__}: {d!s}") + msg = f"Expected dict, got {type(d).__name__}: {d!s}" + raise TypeError(msg) if not isinstance(keys, list) and not isinstance(keys, tuple): - raise TypeError(f"Expected list, got {type(keys).__name__}: {keys!s}") + msg = f"Expected list, got {type(keys).__name__}: {keys!s}" + raise TypeError(msg) def delete_nested_value(d, keys): - """Helper function to delete nested values. + """Delete nested values. Parameters ---------- @@ -1555,18 +1324,13 @@ def delete_nested_value(d, keys): return d -def ordereddict_to_dict(value): - """This function convert ordereddict into regular dict.""" - for k, v in value.items(): - if isinstance(v, dict): - value[k] = ordereddict_to_dict(v) - return dict(value) - - -def repickle(directory): +@deprecated( + "1.8.7", + "Python 2's end of life was over 4 years prior to this release. A user jumping from a C-PAC version that used Python 2 can use this function in any C-PAC version from 1.6.2 up until its removal in an upcoming version.", +) +def repickle(directory): # noqa: T20 """ - Function to check all of the pickles in a given directory, recursively, and - convert any Python 2 pickles found to Python 3 pickles. + Recursively check a directory; convert Python 2 pickles to Python 3 pickles. Parameters ---------- @@ -1586,10 +1350,16 @@ def repickle(directory): f = pickle.load(fp, encoding="latin1") with open(p, "wb") as fp: pickle.dump(f, fp) - except Exception: - pass + print( + f"Converted pickle {fn} from a Python 2 pickle to " + "a Python 3 pickle." + ) + except Exception as e: + print( + f"Could not convert Python 2 pickle {p} " f"because {e}\n" + ) else: - pass + print(f"Pickle {fn} is a Python 3 pickle.") elif fn.endswith(".pklz"): if _pickle2(p, True): try: @@ -1597,16 +1367,27 @@ def repickle(directory): f = pickle.load(fp, encoding="latin1") with gzip.open(p, "wb") as fp: pickle.dump(f, fp) - except Exception: - pass + print( + f"Converted pickle {fn} from a Python 2 pickle to " + "a Python 3 pickle." + ) + except Exception as e: + print( + f"Could not convert Python 2 pickle {p} " f"because {e}\n" + ) else: - pass + print(f"Pickle {fn} is a Python 3 pickle.") -def _pickle2(p, z=False): +@deprecated( + "1.8.7", + "Python 2's end of life was over 4 years prior to this release. A user jumping from a C-PAC version that used Python 2 can use this function in any C-PAC version from 1.6.2 up until its removal in an upcoming version.", +) +def _pickle2(p, z=False): # noqa: T20 """ - Helper function to check if a pickle is a Python 2 pickle. Also prints - other exceptions raised by trying to load the file at p. + Check if a pickle is a Python 2 pickle. + + Also print other exceptions raised by trying to load the file at p. Parameters ---------- @@ -1627,21 +1408,26 @@ def _pickle2(p, z=False): pickle.load(fp) except UnicodeDecodeError: return True - except Exception: - pass + except Exception as e: + print( + f"Pickle {p} may be a Python 3 pickle, but raised " f"exception {e}" + ) else: with open(p, "rb") as fp: try: pickle.load(fp) except UnicodeDecodeError: return True - except Exception: - pass + except Exception as e: + print( + f"Pickle {p} may be a Python 3 pickle, but raised " f"exception {e}" + ) return False -def _changes_1_8_0_to_1_8_1(config_dict): - """ +def _changes_1_8_0_to_1_8_1(config_dict: dict) -> dict: + """Automatically update a configuration dictionary from 1.8.0 to 1.8.1. + Examples -------- Starting with 1.8.0 @@ -1768,9 +1554,7 @@ def _changes_1_8_0_to_1_8_1(config_dict): def _combine_labels(config_dict, list_to_combine, new_key): - """ - Helper function to combine formerly separate keys into a - combined key. + """Combine formerly separate keys into a combined key. Parameters ---------- @@ -1805,7 +1589,8 @@ def _combine_labels(config_dict, list_to_combine, new_key): def concat_list(in_list1=None, in_list2=None): - """ + """Concatenate a pair of lists. + Parameters ---------- in_list1 : list or str @@ -1839,7 +1624,7 @@ def list_item_replace( old, new, ): - """Function to replace an item in a list. + """Replace an item in a list. Parameters ---------- @@ -1871,7 +1656,7 @@ def list_item_replace( def lookup_nested_value(d, keys): - """Helper method to look up nested values. + """Look up nested values. Parameters ---------- @@ -1896,18 +1681,15 @@ def lookup_nested_value(d, keys): if value is None: return "" return value - else: - try: - return lookup_nested_value(d[keys[0]], keys[1:]) - except KeyError as e: - e.args = (keys,) - raise + try: + return lookup_nested_value(d[keys[0]], keys[1:]) + except KeyError as e: + e.args = (keys,) + raise def _now_runswitch(config_dict, key_sequence): - """ - Helper function to convert a formerly forkable value to a - runswitch. + """Convert a formerly forkable value to a runswitch. Parameters ---------- @@ -1929,8 +1711,7 @@ def _now_runswitch(config_dict, key_sequence): def _remove_somethings(value, things_to_remove): - """Helper function to remove instances of any in a given set of - values from a list. + """Remove instances of any in a given set of values from a list. Parameters ---------- @@ -1950,7 +1731,7 @@ def _remove_somethings(value, things_to_remove): def remove_False(d, k): - """Function to remove "Off" and False from a list at a given nested key. + """Remove "Off" and False from a list at a given nested key. Parameters ---------- @@ -1973,7 +1754,7 @@ def remove_False(d, k): def remove_None(d, k): - """Function to remove "None" and None from a list at a given nested key. + """Remove "None" and None from a list at a given nested key. Parameters ---------- @@ -1996,7 +1777,7 @@ def remove_None(d, k): def replace_in_strings(d, replacements=None): - """Helper function to recursively replace substrings. + """Recursively replace substrings. Parameters ---------- @@ -2031,7 +1812,7 @@ def replace_in_strings(d, replacements=None): def set_nested_value(d, keys, value): - """Helper method to set nested values. + """Set nested values. Parameters ---------- @@ -2060,7 +1841,7 @@ def set_nested_value(d, keys, value): def update_config_dict(old_dict): - """Function to convert an old config dict to a new config dict. + """Convert an old config dict to a new config dict. Parameters ---------- @@ -2090,9 +1871,9 @@ def update_config_dict(old_dict): """ def _append_to_list(current_value, new_value): - """Helper function to add new_value to the current_value list - or create a list if one does not exist. Skips falsy elements - in new_value. + """Add new_value to the current_value list, creating list if it does not exist. + + Skips falsy elements in new_value. Parameters ---------- @@ -2133,8 +1914,7 @@ def _append_to_list(current_value, new_value): return current_value def _bool_to_str(old_value, value_if_true): - """Helper function to convert a True or a list containing a - True to a given string. + """Convert a True or a list containing a True to a given string. Parameters ---------- @@ -2169,8 +1949,7 @@ def _bool_to_str(old_value, value_if_true): return None def _get_old_values(old_dict, new_dict, key): - """Helper function to get old and current values of a special key - being updated. + """Get old and current values of a special key being updated. Parameters ---------- @@ -2480,8 +2259,7 @@ def update_nested_dict(d_base, d_update, fully_specified=False): def update_pipeline_values_1_8(d_old): - """Function to update pipeline config values that changed from - C-PAC 1.7 to 1.8. + """Update pipeline config values that changed from C-PAC 1.7 to 1.8. Parameters ---------- @@ -2559,7 +2337,7 @@ def update_pipeline_values_1_8(d_old): centr_value = lookup_nested_value(d, centr_keys) if any(isinstance(v, bool) for v in centr_value): for i in range(2): - if centr_value[i] is True: + if centr_value[i]: centr_value[i] = valid_options["centrality"]["weight_options"][ i ] @@ -2615,10 +2393,10 @@ def update_pipeline_values_1_8(d_old): def update_values_from_list(d_old, last_exception=None): - """Function to convert 1-length lists of an expected type to - single items of that type, or to convert singletons of an expected - list of a type into lists thereof. Also handles some type - conversions against the schema. + """Convert 1-length lists of an expected type to single items of that type... + + ...or to convert singletons of an expected list of a type into lists thereof. + Also handles some type conversions against the schema. Parameters ---------- @@ -2675,53 +2453,45 @@ def update_values_from_list(d_old, last_exception=None): return update_values_from_list( set_nested_value(d, e.path, bool(observed)), e ) - elif isinstance(observed, list): + if isinstance(observed, list): if len(observed) == 0: # pylint: disable=no-else-return return update_values_from_list( set_nested_value(d, e.path, False), e ) - else: - # maintain a list if list expected - list_expected = e.path[-1] == 0 - e_path = e.path[:-1] if list_expected else e.path - if len(observed) == 1: # pylint: disable=no-else-return - if isinstance(observed[0], int): - value = bool(observed[0]) - elif observed[0].lower() in YAML_BOOLS[True]: - value = True - elif observed[0].lower() in YAML_BOOLS[False]: - value = False - return update_values_from_list( - set_nested_value( - d, e_path, [value] if list_expected else value - ), - e, - ) - else: - return update_values_from_list( - set_nested_value( - d, e_path, [bool(value) for value in observed] - ), - e, - ) - elif observed.lower() in YAML_BOOLS[True]: - return update_values_from_list(set_nested_value(d, e.path, True), e) - elif observed.lower() in YAML_BOOLS[False]: - return update_values_from_list(set_nested_value(d, e.path, False), e) - else: + # maintain a list if list expected + list_expected = e.path[-1] == 0 + e_path = e.path[:-1] if list_expected else e.path + if len(observed) == 1: # pylint: disable=no-else-return + if isinstance(observed[0], int): + value = bool(observed[0]) + elif observed[0].lower() in YAML_BOOLS[True]: + value = True + elif observed[0].lower() in YAML_BOOLS[False]: + value = False + return update_values_from_list( + set_nested_value( + d, e_path, [value] if list_expected else value + ), + e, + ) return update_values_from_list( - set_nested_value(d, e_path, observed[0]), e + set_nested_value(d, e_path, [bool(value) for value in observed]), + e, ) + if observed.lower() in YAML_BOOLS[True]: + return update_values_from_list(set_nested_value(d, e.path, True), e) + if observed.lower() in YAML_BOOLS[False]: + return update_values_from_list(set_nested_value(d, e.path, False), e) + return update_values_from_list(set_nested_value(d, e_path, observed[0]), e) - elif expected == "a list": + if expected == "a list": return update_values_from_list(set_nested_value(d, e.path, [observed]), e) - else: - raise e + raise e return d def _replace_changed_values(d, nested_key, replacement_list): - """Helper function to replace values changed from C-PAC 1.7 to C-PAC 1.8. + """Replace values changed from C-PAC 1.7 to C-PAC 1.8. Parameters ---------- @@ -2758,8 +2528,7 @@ def _replace_changed_values(d, nested_key, replacement_list): def _replace_in_value_list(current_value, replacement_tuple): - """Helper function to make character replacements in - `current_value` and drop falsy values. + """Make character replacements and drop falsy values. Parameters ---------- diff --git a/CPAC/utils/versioning/dependencies.py b/CPAC/utils/versioning/dependencies.py index 48237651fc..d2921de3fe 100644 --- a/CPAC/utils/versioning/dependencies.py +++ b/CPAC/utils/versioning/dependencies.py @@ -20,7 +20,7 @@ except ModuleNotFoundError: from importlib_metadata import distributions from pathlib import Path -from subprocess import PIPE, STDOUT, Popen +from subprocess import PIPE, Popen, STDOUT import sys __all__ = ["PYTHON_PACKAGES", "REPORTED", "REQUIREMENTS"] diff --git a/CPAC/vmhc/tests/test_vmhc.py b/CPAC/vmhc/tests/test_vmhc.py index bcdc109d9f..2471a9b02c 100644 --- a/CPAC/vmhc/tests/test_vmhc.py +++ b/CPAC/vmhc/tests/test_vmhc.py @@ -1,11 +1,32 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +from logging import basicConfig, INFO import os import pytest from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.monitoring.custom_logging import getLogger from CPAC.utils.test_mocks import configuration_strategy_mock from CPAC.vmhc.vmhc import vmhc as create_vmhc +logger = getLogger("CPAC.utils.tests") +basicConfig(format="%(message)s", level=INFO) + @pytest.mark.skip(reason="test needs refactoring") def test_vmhc_ants(): @@ -22,14 +43,15 @@ def test_vmhc_ants(): "crashdump_dir": os.path.abspath(pipeline_config.crashLogDirectory), } - strat.get_nodes_names() + nodes = strat.get_nodes_names() + logger.info("nodes %s", nodes) workflow, strat = create_vmhc( workflow, num_strat, strat, pipeline_config, - output_name="vmhc_{0}".format(num_strat), + output_name=f"vmhc_{num_strat}", ) workflow.run() diff --git a/Dockerfile b/Dockerfile index f9ced12d7b..57d737962c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -28,9 +28,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # link libraries & clean up # link libraries & clean up diff --git a/dev/circleci_data/generate_run_command.py b/dev/circleci_data/generate_run_command.py index ef3153a45c..91fd8045b3 100644 --- a/dev/circleci_data/generate_run_command.py +++ b/dev/circleci_data/generate_run_command.py @@ -1,3 +1,14 @@ +#!/bin/bash +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . +"""Generate a run command for testing C-PAC on CircleCI.""" import os import random from warnings import warn @@ -7,7 +18,7 @@ def get_random_subject(species="human"): """ - Function to get a random data config file and subject for a given species. + Get a random data config file and subject for a given species. Note: only human data are configured at the moment @@ -24,12 +35,11 @@ def get_random_subject(species="human"): """ if species == "human": data_config_file = ( - "CPAC/resources/configs/test_configs/" "data-test_4-projects_5-subjects.yml" + "CPAC/resources/configs/test_configs/data-test_4-projects_5-subjects.yml" ) else: - raise NotImplementedError( - f"Data configurations not yet set for random test of {species}" - ) + msg = f"Data configurations not yet set for random test of {species}" + raise NotImplementedError(msg) with open(data_config_file, "r") as data_config: subject_list = yaml.safe_load(data_config) return (data_config_file, random.randrange(len(subject_list))) @@ -37,8 +47,7 @@ def get_random_subject(species="human"): def get_random_test_run_command(): """ - Function to choose a random preconfig, an appropriate subject, and - return a string command to pass to coverage_run.sh. + Choose a random preconfig, an appropriate subject, and return a string command to pass to coverage_run.sh. Parameters ---------- @@ -76,7 +85,7 @@ def get_random_test_run_command(): data_config_file, participant_ndx = get_random_subject(data_species) command = " ".join( [ - "python -m coverage run /code/dev/docker_data/run.py", + "python -m coverage run /code/CPAC/endpoints/run.py", "/home/circleci/project", "/home/circleci/project/outputs participant", f"--save_working_dir --data_config_file {data_config_file}", diff --git a/dev/circleci_data/python_2_pickle.pkl b/dev/circleci_data/python_2_pickle.pkl index 58f8bd0d02..149df4dd98 100644 --- a/dev/circleci_data/python_2_pickle.pkl +++ b/dev/circleci_data/python_2_pickle.pkl @@ -92,4 +92,4 @@ sS'scanList' p46 S'None' p47 -sa. +sa. \ No newline at end of file diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index e142ad161e..d9144f74a4 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -1,3 +1,21 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Tests for CLI utilities.""" +from logging import INFO import os from pathlib import Path import sys @@ -10,7 +28,7 @@ sys.path.append(CPAC_DIR) DATA_DIR = os.path.join(CPAC_DIR, "dev", "circleci_data") -from CPAC.__main__ import utils as CPAC_main_utils +from CPAC.__main__ import utils as CPAC_main_utils # noqa: E402 # pylint: disable=wrong-import-position @@ -42,10 +60,13 @@ def _resolve_alias(command, key): @pytest.mark.parametrize("multiword_connector", ["-", "_"]) -def test_build_data_config(cli_runner, multiword_connector): - """Test CLI ``utils data-config new-settings-template`` and - ``utils data_config new_settings_template``. +def test_build_data_config(caplog, cli_runner, multiword_connector): """ + Test CLI ``utils data-config new-settings-template``... + + ...and ``utils data_config new_settings_template``. + """ + caplog.set_level(INFO) if multiword_connector == "-" and _BACKPORT_CLICK: return os.chdir(DATA_DIR) @@ -64,14 +85,16 @@ def test_build_data_config(cli_runner, multiword_connector): ) assert result.exit_code == 0 - assert result.output.startswith( + assert "\n".join(caplog.messages).startswith( "\nGenerated a default data_settings YAML file for editing" ) assert os.path.exists(test_yaml) _delete_test_yaml(test_yaml) -def test_new_settings_template(cli_runner): +def test_new_settings_template(caplog, cli_runner): + """Test CLI ``utils new-settings-template``.""" + caplog.set_level(INFO) os.chdir(CPAC_DIR) example_dir = os.path.join(CPAC_DIR, "bids-examples") @@ -93,14 +116,16 @@ def test_new_settings_template(cli_runner): group_yaml = os.path.join(DATA_DIR, "group_analysis_participants_ds051.txt") assert result.exit_code == 0 - assert result.output.startswith("\nGenerating data configuration file..") + assert "\n".join(caplog.messages).startswith( + "\nGenerating data configuration file.." + ) assert os.path.exists(participant_yaml) assert os.path.exists(group_yaml) _delete_test_yaml(participant_yaml) _delete_test_yaml(group_yaml) -def test_repickle(cli_runner): +def test_repickle(cli_runner): # noqa fn = "python_2_pickle.pkl" pickle_path = os.path.join(DATA_DIR, fn) backups = [_Backup(pickle_path), _Backup(f"{pickle_path}z")] diff --git a/dev/circleci_data/test_install.py b/dev/circleci_data/test_install.py index 4c1e7b68ff..db359075bc 100644 --- a/dev/circleci_data/test_install.py +++ b/dev/circleci_data/test_install.py @@ -14,7 +14,8 @@ def test_AFNI_libraries(): if (d.endswith(".simg") or d.endswith(".sif")) ) except: - raise Exception("Singularity image not in expected location.") + msg = "Singularity image not in expected location." + raise Exception(msg) if os.path.exists(SINGULARITY_IMAGE_PATH): afni_libraries = Client.execute( Client.instance(SINGULARITY_IMAGE_PATH), diff --git a/variant-ABCD-HCP.Dockerfile b/variant-ABCD-HCP.Dockerfile index 2a5760ebb7..d62a0a02a2 100644 --- a/variant-ABCD-HCP.Dockerfile +++ b/variant-ABCD-HCP.Dockerfile @@ -1,3 +1,13 @@ +#!/bin/bash +# Copyright (C) 2022-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . FROM ghcr.io/fcp-indi/c-pac/stage-base:abcd-hcp-v1.8.7.dev1 LABEL org.opencontainers.image.description "Full C-PAC image with software dependencies version-matched to [ABCD-HCP BIDS fMRI Pipeline](https://github.com/DCAN-Labs/abcd-hcp-pipeline/blob/e480a8f99534f1b05f37bf44c64827384b69b383/Dockerfile)" LABEL org.opencontainers.image.source https://github.com/FCP-INDI/C-PAC @@ -12,9 +22,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # Link libraries for Singularity images RUN ldconfig \ diff --git a/variant-fMRIPrep-LTS.Dockerfile b/variant-fMRIPrep-LTS.Dockerfile index 0e9cd3d899..e63f278cb8 100644 --- a/variant-fMRIPrep-LTS.Dockerfile +++ b/variant-fMRIPrep-LTS.Dockerfile @@ -1,3 +1,13 @@ +#!/bin/bash +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public License along with C-PAC. If not, see . FROM ghcr.io/fcp-indi/c-pac/stage-base:fmriprep-lts-v1.8.7.dev1 LABEL org.opencontainers.image.description "Full C-PAC image with software dependencies version-matched to [fMRIPrep LTS](https://reproducibility.stanford.edu/fmriprep-lts#long-term-support-lts)" LABEL org.opencontainers.image.source https://github.com/FCP-INDI/C-PAC @@ -12,9 +22,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - chmod +x /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run-with-freesurfer.sh"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + chmod +x /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run-with-freesurfer.sh"] # link libraries & clean up RUN sed -i 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen \ diff --git a/variant-lite.Dockerfile b/variant-lite.Dockerfile index 1f6f3a9ec9..98280b0b80 100644 --- a/variant-lite.Dockerfile +++ b/variant-lite.Dockerfile @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2023 C-PAC Developers +# Copyright (C) 2022-2024 C-PAC Developers # This file is part of C-PAC. @@ -29,9 +29,9 @@ COPY dev/docker_data /code/docker_data RUN rm -Rf /code/docker_data/checksum && \ mv /code/docker_data/* /code && \ rm -Rf /code/docker_data && \ - chmod +x /code/run.py && \ - rm -Rf /code/run-with-freesurfer.sh -ENTRYPOINT ["/code/run.py"] + chmod +x /code/CPAC/_entrypoints/run.py && \ + rm -Rf /code/CPAC/_entrypoints/run-with-freesurfer.sh +ENTRYPOINT ["/code/CPAC/_entrypoints/run.py"] # link libraries & clean up # link libraries & clean up