diff --git a/.ruff.toml b/.ruff.toml
index 18cd0008c2..cd7ebeedf3 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -4,7 +4,7 @@ extend-select = ["A", "C4", "D", "EM", "F541", "G", "I", "ICN", "NPY", "PL", "RE
target-version = "py37"
[lint]
-external = ["T20"]
+external = ["T20"] # Don't autoremove 'noqa` comments for these rules
[lint.flake8-import-conventions.extend-aliases]
"CPAC.pipeline.cpac_group_runner" = "cgr"
diff --git a/CPAC/cwas/cwas.py b/CPAC/cwas/cwas.py
index 616af48710..23c73755ba 100644
--- a/CPAC/cwas/cwas.py
+++ b/CPAC/cwas/cwas.py
@@ -1,3 +1,20 @@
+# Copyright (C) 2012-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""CWAS module for CPAC."""
import os
import numpy as np
@@ -15,8 +32,9 @@
def joint_mask(subjects, mask_file=None):
- """
- Creates a joint mask (intersection) common to all the subjects in a provided list
+ """Create a joint mask.
+
+ A joint mask is an intersection common to all the subjects in a provided list
and a provided mask.
Parameters
@@ -42,12 +60,14 @@ def joint_mask(subjects, mask_file=None):
def calc_mdmrs(D, regressor, cols, permutations):
+ """Calculate pseudo-F values and significance probabilities."""
cols = np.array(cols, dtype=np.int32)
F_set, p_set = mdmr(D, regressor, cols, permutations)
return F_set, p_set
def calc_subdists(subjects_data, voxel_range):
+ """Calculate the subdistributions of the subjects data."""
subjects, voxels, _ = subjects_data.shape
D = np.zeros((len(voxel_range), subjects, subjects))
for i, v in enumerate(voxel_range):
@@ -64,12 +84,14 @@ def calc_subdists(subjects_data, voxel_range):
def calc_cwas(
subjects_data, regressor, regressor_selected_cols, permutations, voxel_range
):
+ """Calculate CWAS pseudo-F values and significance probabilities."""
D = calc_subdists(subjects_data, voxel_range)
F_set, p_set = calc_mdmrs(D, regressor, regressor_selected_cols, permutations)
return F_set, p_set
def pval_to_zval(p_set, permu):
+ """Convert p-values to z-values."""
inv_pval = 1 - p_set
zvals = t.ppf(inv_pval, (len(p_set) - 1))
zvals[zvals == -inf] = permu / (permu + 1)
@@ -86,8 +108,7 @@ def nifti_cwas(
permutations,
voxel_range,
):
- """
- Performs CWAS for a group of subjects.
+ """Perform CWAS for a group of subjects.
Parameters
----------
@@ -120,7 +141,7 @@ def nifti_cwas(
regressor_data = pd.read_table(
regressor_file, sep=None, engine="python", dtype={participant_column: str}
)
- except:
+ except (KeyError, OSError, pd.errors.ParserError, ValueError):
regressor_data = pd.read_table(regressor_file, sep=None, engine="python")
regressor_data = regressor_data.astype({participant_column: str})
@@ -167,7 +188,7 @@ def nifti_cwas(
)
if len(regressor.shape) == 1:
regressor = regressor[:, np.newaxis]
- elif len(regressor.shape) != 2:
+ elif len(regressor.shape) != 2: # noqa: PLR2004
raise ValueError("Bad regressor shape: %s" % str(regressor.shape))
if len(subject_files) != regressor.shape[0]:
msg = "Number of subjects does not match regressor size"
@@ -195,19 +216,22 @@ def nifti_cwas(
def create_cwas_batches(mask_file, batches):
+ """Create batches of voxels to process in parallel."""
mask = nib.load(mask_file).get_fdata().astype("bool")
voxels = mask.sum(dtype=int)
return np.array_split(np.arange(voxels), batches)
def volumize(mask_image, data):
+ """Create a volume from a mask and data."""
mask_data = mask_image.get_fdata().astype("bool")
volume = np.zeros_like(mask_data, dtype=data.dtype)
- volume[np.where(mask_data is True)] = data
+ volume[mask_data] = data
return nib.Nifti1Image(volume, header=mask_image.header, affine=mask_image.affine)
def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):
+ """Merge CWAS batches into a single volume."""
_, _, voxel_range = zip(*cwas_batches)
voxels = np.array(np.concatenate(voxel_range))
@@ -248,6 +272,7 @@ def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):
def zstat_image(zvals, mask_file):
+ """Create a zstat image from zvals and mask_file."""
mask_image = nib.load(mask_file)
z_vol = volumize(mask_image, zvals)
diff --git a/CPAC/cwas/tests/test_cwas.py b/CPAC/cwas/tests/test_cwas.py
index 2e094bd8b1..f80175ac35 100755
--- a/CPAC/cwas/tests/test_cwas.py
+++ b/CPAC/cwas/tests/test_cwas.py
@@ -1,23 +1,50 @@
+# Copyright (C) 2012-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Test the CWAS pipeline."""
+from logging import basicConfig, INFO
+
import pytest
+import nibabel as nib
from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin
+from CPAC.utils.monitoring.custom_logging import getLogger
+
+logger = getLogger("CPAC.cwas.tests")
+basicConfig(format="%(message)s", level=INFO)
@pytest.mark.skip(reason="requires RegressionTester")
def test_adhd04():
+ """Test CWAS with ADHD04 data."""
rt = RegressionTester("adhd04", "diagnosis", "diagnosis")
rt.run()
@pytest.mark.skip(reason="requires RegressionTester")
def test_adhd40():
+ """Test CWAS with ADHD40 data."""
rt = RegressionTester("adhd40", "diagnosis", "diagnosis + age + sex + meanFD")
rt.run()
@pytest.mark.skip(reason="requires specific local directory")
class RegressionTester(object):
- """
+ """Test the CWAS pipeline in Python and R, and compare the results.
+
tmp = RegressionTester('adhd04', 'diagnosis', 'diagnosis')
tmp.run().
"""
@@ -36,13 +63,18 @@ def __init__(
self.formula = formula
def run(self):
+ """Run the CWAS pipeline in Python and R, and compare the results."""
+ logger.info("Python-Based CWAS")
self.run_cwas()
+ logger.info("R-Based CWAS")
self.run_connectir()
+ logger.info("Compare Python and R")
self.compare_py_vs_r()
def run_cwas(self):
+ """Run the CWAS pipeline in Python."""
import os
os.chdir("%s/C-PAC" % self.base)
@@ -80,10 +112,8 @@ def run_cwas(self):
###
# Read in list of subject functionals
- subjects_list = [
- l.strip().strip('"')
- for l in open(sfile).readlines() # pylint: disable=consider-using-with
- ]
+ with open(sfile) as _f:
+ subjects_list = [l.strip().strip('"') for l in _f.readlines()] # noqa: E741
# Read in design/regressor file
regressors = np.loadtxt(rfile)
@@ -111,21 +141,23 @@ def run_cwas(self):
# pass
# Run it!
- time.clock()
+ start = time.clock()
plugin_args = {"n_procs": 4}
c.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args)
- time.clock()
+ end = time.clock()
+ logger.info("time: %.2gs", end - start)
def run_connectir(self):
- """
- This runs distances and MDMR with connectir.
+ """Distances and MDMR with connectir.
+
This should be run after run_cwas().
"""
import os
import time
- time.clock()
+ start = time.clock()
+ logger.info("Subject Distances")
cmd = (
"connectir_subdist.R --infuncs1 %(basedir)s/configs/"
"%(outbase)s_funcpaths_4mm.txt --brainmask1 %(basedir)s/"
@@ -134,8 +166,10 @@ def run_connectir(self):
"--forks 1 --threads 12 %(basedir)s/results_%(outbase)s.r"
% {"basedir": self.base, "outbase": self.name}
)
+ logger.info("RUNNING: %s", cmd)
os.system(cmd)
+ logger.info("MDMR")
cmd = (
"connectir_mdmr.R --indir %(basedir)s/results_%(outbase)s.r "
"--formula '%(formula)s' --model "
@@ -150,13 +184,15 @@ def run_connectir(self):
"nperms": 1000,
}
)
+ logger.info("RUNNING: %s", cmd)
os.system(cmd)
- time.clock()
+ end = time.clock()
+ logger.info("time: %.2gs", end - start)
@pytest.mark.skip(reason="No R installation in C-PAC image")
def compare_py_vs_r(self):
- """This will compare the output from the CPAC python vs the R connectir."""
+ """Compare the output from the CPAC python vs the R connectir."""
import os
os.chdir("%s/C-PAC" % self.base)
@@ -239,18 +275,20 @@ def compare_py_vs_r(self):
comp = np.allclose(py_hat, r_hat)
assert_that(comp, "regressors as hat matrices")
- comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99
+ comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004
assert_that(comp, "Fstats similarity")
- comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99
+ comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004
assert_that(comp, "p-values similarity ")
- comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01
+ comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 # noqa: PLR2004
assert_that(comp, "Fstats difference")
- comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05
+ comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 # noqa: PLR2004
assert_that(comp, "p-values difference")
+ logger.info("tests were all good")
+
def test_cwas_connectir():
# add the code to run the same cwas with connectir
@@ -302,15 +340,18 @@ def test_distances():
mask_file = op.join(sdir, "mask.nii.gz")
sfile = "/home2/data/Projects/CWAS/share/nki/subinfo/40_Set1_N104/short_compcor_funcpaths_4mm_smoothed.txt"
- subjects_list = [l.strip().strip('"') for l in open(sfile).readlines()]
+ subjects_list = [
+ l.strip().strip('"')
+ for l in open(sfile).readlines() # noqa: E741
+ ]
subjects_list = subjects_list[:n]
subjects_file_list = subjects_list
- mask = nb.load(mask_file).get_fdata().astype("bool")
+ mask = nib.load(mask_file).get_fdata().astype("bool")
mask_indices = np.where(mask)
subjects_data = [
- nb.load(subject_file).get_fdata().astype("float64")[mask_indices].T
+ nib.load(subject_file).get_fdata().astype("float64")[mask_indices].T
for subject_file in subjects_file_list
]
subjects_data = np.array(subjects_data)
diff --git a/CPAC/cwas/tests/test_pipeline_cwas.py b/CPAC/cwas/tests/test_pipeline_cwas.py
index 5db2e377e2..3e1f05ad5d 100644
--- a/CPAC/cwas/tests/test_pipeline_cwas.py
+++ b/CPAC/cwas/tests/test_pipeline_cwas.py
@@ -1,3 +1,21 @@
+# Copyright (C) 2021-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Test the CWAS pipeline."""
+from logging import basicConfig, INFO
import os
from urllib.error import URLError
@@ -8,18 +26,24 @@
import nilearn.datasets
from CPAC.cwas.pipeline import create_cwas
+from CPAC.utils.monitoring.custom_logging import getLogger
+
+logger = getLogger("CPAC.cwas.tests")
+basicConfig(format="%(message)s", level=INFO)
@pytest.mark.parametrize("z_score", [[0], [1], [0, 1], []])
def test_pipeline(z_score):
+ """Test the CWAS pipeline with z-score forking options."""
try:
# pylint: disable=invalid-name
cc = nilearn.datasets.fetch_atlas_craddock_2012()
except URLError:
+ logger.info("Could not fetch atlas, skipping test")
return
try:
os.mkdir("/tmp/cwas")
- except:
+ except: # noqa: E722
pass
abide_data = nilearn.datasets.fetch_abide_pcp(n_subjects=10)
@@ -34,7 +58,7 @@ def test_pipeline(z_score):
assert all(FID in images[i] for i, FID in enumerate(pheno.FILE_ID))
img = nib.load(cc["scorr_mean"])
img_data = np.copy(img.get_fdata()[:, :, :, 10])
- img_data[img_data != 2] = 0.0
+ img_data[img_data != 2] = 0.0 # noqa: PLR2004
img = nib.Nifti1Image(img_data, img.affine)
nib.save(img, "/tmp/cwas/roi.nii.gz")