diff --git a/.ruff.toml b/.ruff.toml index 18cd0008c2..cd7ebeedf3 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -4,7 +4,7 @@ extend-select = ["A", "C4", "D", "EM", "F541", "G", "I", "ICN", "NPY", "PL", "RE target-version = "py37" [lint] -external = ["T20"] +external = ["T20"] # Don't autoremove 'noqa` comments for these rules [lint.flake8-import-conventions.extend-aliases] "CPAC.pipeline.cpac_group_runner" = "cgr" diff --git a/CPAC/cwas/cwas.py b/CPAC/cwas/cwas.py index 616af48710..23c73755ba 100644 --- a/CPAC/cwas/cwas.py +++ b/CPAC/cwas/cwas.py @@ -1,3 +1,20 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""CWAS module for CPAC.""" import os import numpy as np @@ -15,8 +32,9 @@ def joint_mask(subjects, mask_file=None): - """ - Creates a joint mask (intersection) common to all the subjects in a provided list + """Create a joint mask. + + A joint mask is an intersection common to all the subjects in a provided list and a provided mask. Parameters @@ -42,12 +60,14 @@ def joint_mask(subjects, mask_file=None): def calc_mdmrs(D, regressor, cols, permutations): + """Calculate pseudo-F values and significance probabilities.""" cols = np.array(cols, dtype=np.int32) F_set, p_set = mdmr(D, regressor, cols, permutations) return F_set, p_set def calc_subdists(subjects_data, voxel_range): + """Calculate the subdistributions of the subjects data.""" subjects, voxels, _ = subjects_data.shape D = np.zeros((len(voxel_range), subjects, subjects)) for i, v in enumerate(voxel_range): @@ -64,12 +84,14 @@ def calc_subdists(subjects_data, voxel_range): def calc_cwas( subjects_data, regressor, regressor_selected_cols, permutations, voxel_range ): + """Calculate CWAS pseudo-F values and significance probabilities.""" D = calc_subdists(subjects_data, voxel_range) F_set, p_set = calc_mdmrs(D, regressor, regressor_selected_cols, permutations) return F_set, p_set def pval_to_zval(p_set, permu): + """Convert p-values to z-values.""" inv_pval = 1 - p_set zvals = t.ppf(inv_pval, (len(p_set) - 1)) zvals[zvals == -inf] = permu / (permu + 1) @@ -86,8 +108,7 @@ def nifti_cwas( permutations, voxel_range, ): - """ - Performs CWAS for a group of subjects. + """Perform CWAS for a group of subjects. Parameters ---------- @@ -120,7 +141,7 @@ def nifti_cwas( regressor_data = pd.read_table( regressor_file, sep=None, engine="python", dtype={participant_column: str} ) - except: + except (KeyError, OSError, pd.errors.ParserError, ValueError): regressor_data = pd.read_table(regressor_file, sep=None, engine="python") regressor_data = regressor_data.astype({participant_column: str}) @@ -167,7 +188,7 @@ def nifti_cwas( ) if len(regressor.shape) == 1: regressor = regressor[:, np.newaxis] - elif len(regressor.shape) != 2: + elif len(regressor.shape) != 2: # noqa: PLR2004 raise ValueError("Bad regressor shape: %s" % str(regressor.shape)) if len(subject_files) != regressor.shape[0]: msg = "Number of subjects does not match regressor size" @@ -195,19 +216,22 @@ def nifti_cwas( def create_cwas_batches(mask_file, batches): + """Create batches of voxels to process in parallel.""" mask = nib.load(mask_file).get_fdata().astype("bool") voxels = mask.sum(dtype=int) return np.array_split(np.arange(voxels), batches) def volumize(mask_image, data): + """Create a volume from a mask and data.""" mask_data = mask_image.get_fdata().astype("bool") volume = np.zeros_like(mask_data, dtype=data.dtype) - volume[np.where(mask_data is True)] = data + volume[mask_data] = data return nib.Nifti1Image(volume, header=mask_image.header, affine=mask_image.affine) def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations): + """Merge CWAS batches into a single volume.""" _, _, voxel_range = zip(*cwas_batches) voxels = np.array(np.concatenate(voxel_range)) @@ -248,6 +272,7 @@ def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations): def zstat_image(zvals, mask_file): + """Create a zstat image from zvals and mask_file.""" mask_image = nib.load(mask_file) z_vol = volumize(mask_image, zvals) diff --git a/CPAC/cwas/tests/test_cwas.py b/CPAC/cwas/tests/test_cwas.py index 2e094bd8b1..f80175ac35 100755 --- a/CPAC/cwas/tests/test_cwas.py +++ b/CPAC/cwas/tests/test_cwas.py @@ -1,23 +1,50 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test the CWAS pipeline.""" +from logging import basicConfig, INFO + import pytest +import nibabel as nib from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.cwas.tests") +basicConfig(format="%(message)s", level=INFO) @pytest.mark.skip(reason="requires RegressionTester") def test_adhd04(): + """Test CWAS with ADHD04 data.""" rt = RegressionTester("adhd04", "diagnosis", "diagnosis") rt.run() @pytest.mark.skip(reason="requires RegressionTester") def test_adhd40(): + """Test CWAS with ADHD40 data.""" rt = RegressionTester("adhd40", "diagnosis", "diagnosis + age + sex + meanFD") rt.run() @pytest.mark.skip(reason="requires specific local directory") class RegressionTester(object): - """ + """Test the CWAS pipeline in Python and R, and compare the results. + tmp = RegressionTester('adhd04', 'diagnosis', 'diagnosis') tmp.run(). """ @@ -36,13 +63,18 @@ def __init__( self.formula = formula def run(self): + """Run the CWAS pipeline in Python and R, and compare the results.""" + logger.info("Python-Based CWAS") self.run_cwas() + logger.info("R-Based CWAS") self.run_connectir() + logger.info("Compare Python and R") self.compare_py_vs_r() def run_cwas(self): + """Run the CWAS pipeline in Python.""" import os os.chdir("%s/C-PAC" % self.base) @@ -80,10 +112,8 @@ def run_cwas(self): ### # Read in list of subject functionals - subjects_list = [ - l.strip().strip('"') - for l in open(sfile).readlines() # pylint: disable=consider-using-with - ] + with open(sfile) as _f: + subjects_list = [l.strip().strip('"') for l in _f.readlines()] # noqa: E741 # Read in design/regressor file regressors = np.loadtxt(rfile) @@ -111,21 +141,23 @@ def run_cwas(self): # pass # Run it! - time.clock() + start = time.clock() plugin_args = {"n_procs": 4} c.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args) - time.clock() + end = time.clock() + logger.info("time: %.2gs", end - start) def run_connectir(self): - """ - This runs distances and MDMR with connectir. + """Distances and MDMR with connectir. + This should be run after run_cwas(). """ import os import time - time.clock() + start = time.clock() + logger.info("Subject Distances") cmd = ( "connectir_subdist.R --infuncs1 %(basedir)s/configs/" "%(outbase)s_funcpaths_4mm.txt --brainmask1 %(basedir)s/" @@ -134,8 +166,10 @@ def run_connectir(self): "--forks 1 --threads 12 %(basedir)s/results_%(outbase)s.r" % {"basedir": self.base, "outbase": self.name} ) + logger.info("RUNNING: %s", cmd) os.system(cmd) + logger.info("MDMR") cmd = ( "connectir_mdmr.R --indir %(basedir)s/results_%(outbase)s.r " "--formula '%(formula)s' --model " @@ -150,13 +184,15 @@ def run_connectir(self): "nperms": 1000, } ) + logger.info("RUNNING: %s", cmd) os.system(cmd) - time.clock() + end = time.clock() + logger.info("time: %.2gs", end - start) @pytest.mark.skip(reason="No R installation in C-PAC image") def compare_py_vs_r(self): - """This will compare the output from the CPAC python vs the R connectir.""" + """Compare the output from the CPAC python vs the R connectir.""" import os os.chdir("%s/C-PAC" % self.base) @@ -239,18 +275,20 @@ def compare_py_vs_r(self): comp = np.allclose(py_hat, r_hat) assert_that(comp, "regressors as hat matrices") - comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 + comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004 assert_that(comp, "Fstats similarity") - comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 + comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004 assert_that(comp, "p-values similarity ") - comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 + comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 # noqa: PLR2004 assert_that(comp, "Fstats difference") - comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 + comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 # noqa: PLR2004 assert_that(comp, "p-values difference") + logger.info("tests were all good") + def test_cwas_connectir(): # add the code to run the same cwas with connectir @@ -302,15 +340,18 @@ def test_distances(): mask_file = op.join(sdir, "mask.nii.gz") sfile = "/home2/data/Projects/CWAS/share/nki/subinfo/40_Set1_N104/short_compcor_funcpaths_4mm_smoothed.txt" - subjects_list = [l.strip().strip('"') for l in open(sfile).readlines()] + subjects_list = [ + l.strip().strip('"') + for l in open(sfile).readlines() # noqa: E741 + ] subjects_list = subjects_list[:n] subjects_file_list = subjects_list - mask = nb.load(mask_file).get_fdata().astype("bool") + mask = nib.load(mask_file).get_fdata().astype("bool") mask_indices = np.where(mask) subjects_data = [ - nb.load(subject_file).get_fdata().astype("float64")[mask_indices].T + nib.load(subject_file).get_fdata().astype("float64")[mask_indices].T for subject_file in subjects_file_list ] subjects_data = np.array(subjects_data) diff --git a/CPAC/cwas/tests/test_pipeline_cwas.py b/CPAC/cwas/tests/test_pipeline_cwas.py index 5db2e377e2..3e1f05ad5d 100644 --- a/CPAC/cwas/tests/test_pipeline_cwas.py +++ b/CPAC/cwas/tests/test_pipeline_cwas.py @@ -1,3 +1,21 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Test the CWAS pipeline.""" +from logging import basicConfig, INFO import os from urllib.error import URLError @@ -8,18 +26,24 @@ import nilearn.datasets from CPAC.cwas.pipeline import create_cwas +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("CPAC.cwas.tests") +basicConfig(format="%(message)s", level=INFO) @pytest.mark.parametrize("z_score", [[0], [1], [0, 1], []]) def test_pipeline(z_score): + """Test the CWAS pipeline with z-score forking options.""" try: # pylint: disable=invalid-name cc = nilearn.datasets.fetch_atlas_craddock_2012() except URLError: + logger.info("Could not fetch atlas, skipping test") return try: os.mkdir("/tmp/cwas") - except: + except: # noqa: E722 pass abide_data = nilearn.datasets.fetch_abide_pcp(n_subjects=10) @@ -34,7 +58,7 @@ def test_pipeline(z_score): assert all(FID in images[i] for i, FID in enumerate(pheno.FILE_ID)) img = nib.load(cc["scorr_mean"]) img_data = np.copy(img.get_fdata()[:, :, :, 10]) - img_data[img_data != 2] = 0.0 + img_data[img_data != 2] = 0.0 # noqa: PLR2004 img = nib.Nifti1Image(img_data, img.affine) nib.save(img, "/tmp/cwas/roi.nii.gz")