🚨 Lint CWAS files included in test suite

FCP-INDI · Feb 6, 2024 · 4650f4f · 4650f4f
1 parent de50ef3
commit 4650f4f
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 29 deletions.
diff --git a/.ruff.toml b/.ruff.toml
@@ -4,7 +4,7 @@ extend-select = ["A", "C4", "D", "EM", "F541", "G", "I", "ICN", "NPY", "PL", "RE
 target-version = "py37"
 
 [lint]
-external = ["T20"]
+external = ["T20"]  # Don't autoremove 'noqa` comments for these rules
 
 [lint.flake8-import-conventions.extend-aliases]
 "CPAC.pipeline.cpac_group_runner" = "cgr"

diff --git a/CPAC/cwas/cwas.py b/CPAC/cwas/cwas.py
@@ -1,3 +1,20 @@
+# Copyright (C) 2012-2024  C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
+"""CWAS module for CPAC."""
 import os
 
 import numpy as np
@@ -15,8 +32,9 @@
 
 
 def joint_mask(subjects, mask_file=None):
-    """
-    Creates a joint mask (intersection) common to all the subjects in a provided list
+    """Create a joint mask.
+
+    A joint mask is an intersection common to all the subjects in a provided list
     and a provided mask.
 
     Parameters
@@ -42,12 +60,14 @@ def joint_mask(subjects, mask_file=None):
 
 
 def calc_mdmrs(D, regressor, cols, permutations):
+    """Calculate pseudo-F values and significance probabilities."""
     cols = np.array(cols, dtype=np.int32)
     F_set, p_set = mdmr(D, regressor, cols, permutations)
     return F_set, p_set
 
 
 def calc_subdists(subjects_data, voxel_range):
+    """Calculate the subdistributions of the subjects data."""
     subjects, voxels, _ = subjects_data.shape
     D = np.zeros((len(voxel_range), subjects, subjects))
     for i, v in enumerate(voxel_range):
@@ -64,12 +84,14 @@ def calc_subdists(subjects_data, voxel_range):
 def calc_cwas(
     subjects_data, regressor, regressor_selected_cols, permutations, voxel_range
 ):
+    """Calculate CWAS pseudo-F values and significance probabilities."""
     D = calc_subdists(subjects_data, voxel_range)
     F_set, p_set = calc_mdmrs(D, regressor, regressor_selected_cols, permutations)
     return F_set, p_set
 
 
 def pval_to_zval(p_set, permu):
+    """Convert p-values to z-values."""
     inv_pval = 1 - p_set
     zvals = t.ppf(inv_pval, (len(p_set) - 1))
     zvals[zvals == -inf] = permu / (permu + 1)
@@ -86,8 +108,7 @@ def nifti_cwas(
     permutations,
     voxel_range,
 ):
-    """
-    Performs CWAS for a group of subjects.
+    """Perform CWAS for a group of subjects.
 
     Parameters
     ----------
@@ -120,7 +141,7 @@ def nifti_cwas(
         regressor_data = pd.read_table(
             regressor_file, sep=None, engine="python", dtype={participant_column: str}
         )
-    except:
+    except (KeyError, OSError, pd.errors.ParserError, ValueError):
         regressor_data = pd.read_table(regressor_file, sep=None, engine="python")
         regressor_data = regressor_data.astype({participant_column: str})
 
@@ -167,7 +188,7 @@ def nifti_cwas(
     )
     if len(regressor.shape) == 1:
         regressor = regressor[:, np.newaxis]
-    elif len(regressor.shape) != 2:
+    elif len(regressor.shape) != 2:  # noqa: PLR2004
         raise ValueError("Bad regressor shape: %s" % str(regressor.shape))
     if len(subject_files) != regressor.shape[0]:
         msg = "Number of subjects does not match regressor size"
@@ -195,19 +216,22 @@ def nifti_cwas(
 
 
 def create_cwas_batches(mask_file, batches):
+    """Create batches of voxels to process in parallel."""
     mask = nib.load(mask_file).get_fdata().astype("bool")
     voxels = mask.sum(dtype=int)
     return np.array_split(np.arange(voxels), batches)
 
 
 def volumize(mask_image, data):
+    """Create a volume from a mask and data."""
     mask_data = mask_image.get_fdata().astype("bool")
     volume = np.zeros_like(mask_data, dtype=data.dtype)
-    volume[np.where(mask_data is True)] = data
+    volume[mask_data] = data
     return nib.Nifti1Image(volume, header=mask_image.header, affine=mask_image.affine)
 
 
 def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):
+    """Merge CWAS batches into a single volume."""
     _, _, voxel_range = zip(*cwas_batches)
     voxels = np.array(np.concatenate(voxel_range))
 
@@ -248,6 +272,7 @@ def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):
 
 
 def zstat_image(zvals, mask_file):
+    """Create a zstat image from zvals and mask_file."""
     mask_image = nib.load(mask_file)
 
     z_vol = volumize(mask_image, zvals)

diff --git a/CPAC/cwas/tests/test_cwas.py b/CPAC/cwas/tests/test_cwas.py
@@ -1,23 +1,50 @@
+# Copyright (C) 2012-2024  C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
+"""Test the CWAS pipeline."""
+from logging import basicConfig, INFO
+
 import pytest
+import nibabel as nib
 
 from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin
+from CPAC.utils.monitoring.custom_logging import getLogger
+
+logger = getLogger("CPAC.cwas.tests")
+basicConfig(format="%(message)s", level=INFO)
 
 
 @pytest.mark.skip(reason="requires RegressionTester")
 def test_adhd04():
+    """Test CWAS with ADHD04 data."""
     rt = RegressionTester("adhd04", "diagnosis", "diagnosis")
     rt.run()
 
 
 @pytest.mark.skip(reason="requires RegressionTester")
 def test_adhd40():
+    """Test CWAS with ADHD40 data."""
     rt = RegressionTester("adhd40", "diagnosis", "diagnosis + age + sex + meanFD")
     rt.run()
 
 
 @pytest.mark.skip(reason="requires specific local directory")
 class RegressionTester(object):
-    """
+    """Test the CWAS pipeline in Python and R, and compare the results.
+
     tmp = RegressionTester('adhd04', 'diagnosis', 'diagnosis')
     tmp.run().
     """
@@ -36,13 +63,18 @@ def __init__(
         self.formula = formula
 
     def run(self):
+        """Run the CWAS pipeline in Python and R, and compare the results."""
+        logger.info("Python-Based CWAS")
         self.run_cwas()
 
+        logger.info("R-Based CWAS")
         self.run_connectir()
 
+        logger.info("Compare Python and R")
         self.compare_py_vs_r()
 
     def run_cwas(self):
+        """Run the CWAS pipeline in Python."""
         import os
 
         os.chdir("%s/C-PAC" % self.base)
@@ -80,10 +112,8 @@ def run_cwas(self):
         ###
 
         # Read in list of subject functionals
-        subjects_list = [
-            l.strip().strip('"')
-            for l in open(sfile).readlines()  # pylint: disable=consider-using-with
-        ]
+        with open(sfile) as _f:
+            subjects_list = [l.strip().strip('"') for l in _f.readlines()]  # noqa: E741
 
         # Read in design/regressor file
         regressors = np.loadtxt(rfile)
@@ -111,21 +141,23 @@ def run_cwas(self):
         #     pass
 
         # Run it!
-        time.clock()
+        start = time.clock()
         plugin_args = {"n_procs": 4}
         c.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args)
-        time.clock()
+        end = time.clock()
+        logger.info("time: %.2gs", end - start)
 
     def run_connectir(self):
-        """
-        This runs distances and MDMR with connectir.
+        """Distances and MDMR with connectir.
+
         This should be run after run_cwas().
         """
         import os
         import time
 
-        time.clock()
+        start = time.clock()
 
+        logger.info("Subject Distances")
         cmd = (
             "connectir_subdist.R --infuncs1 %(basedir)s/configs/"
             "%(outbase)s_funcpaths_4mm.txt --brainmask1 %(basedir)s/"
@@ -134,8 +166,10 @@ def run_connectir(self):
             "--forks 1 --threads 12 %(basedir)s/results_%(outbase)s.r"
             % {"basedir": self.base, "outbase": self.name}
         )
+        logger.info("RUNNING: %s", cmd)
         os.system(cmd)
 
+        logger.info("MDMR")
         cmd = (
             "connectir_mdmr.R --indir %(basedir)s/results_%(outbase)s.r "
             "--formula '%(formula)s' --model "
@@ -150,13 +184,15 @@ def run_connectir(self):
                 "nperms": 1000,
             }
         )
+        logger.info("RUNNING: %s", cmd)
         os.system(cmd)
 
-        time.clock()
+        end = time.clock()
+        logger.info("time: %.2gs", end - start)
 
     @pytest.mark.skip(reason="No R installation in C-PAC image")
     def compare_py_vs_r(self):
-        """This will compare the output from the CPAC python vs the R connectir."""
+        """Compare the output from the CPAC python vs the R connectir."""
         import os
 
         os.chdir("%s/C-PAC" % self.base)
@@ -239,18 +275,20 @@ def compare_py_vs_r(self):
         comp = np.allclose(py_hat, r_hat)
         assert_that(comp, "regressors as hat matrices")
 
-        comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99
+        comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99  # noqa: PLR2004
         assert_that(comp, "Fstats similarity")
 
-        comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99
+        comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99  # noqa: PLR2004
         assert_that(comp, "p-values similarity ")
 
-        comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01
+        comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01  # noqa: PLR2004
         assert_that(comp, "Fstats difference")
 
-        comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05
+        comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05  # noqa: PLR2004
         assert_that(comp, "p-values difference")
 
+        logger.info("tests were all good")
+
 
 def test_cwas_connectir():
     # add the code to run the same cwas with connectir
@@ -302,15 +340,18 @@ def test_distances():
     mask_file = op.join(sdir, "mask.nii.gz")
 
     sfile = "/home2/data/Projects/CWAS/share/nki/subinfo/40_Set1_N104/short_compcor_funcpaths_4mm_smoothed.txt"
-    subjects_list = [l.strip().strip('"') for l in open(sfile).readlines()]
+    subjects_list = [
+        l.strip().strip('"')
+        for l in open(sfile).readlines()  # noqa: E741
+    ]
     subjects_list = subjects_list[:n]
     subjects_file_list = subjects_list
 
-    mask = nb.load(mask_file).get_fdata().astype("bool")
+    mask = nib.load(mask_file).get_fdata().astype("bool")
     mask_indices = np.where(mask)
 
     subjects_data = [
-        nb.load(subject_file).get_fdata().astype("float64")[mask_indices].T
+        nib.load(subject_file).get_fdata().astype("float64")[mask_indices].T
         for subject_file in subjects_file_list
     ]
     subjects_data = np.array(subjects_data)

diff --git a/CPAC/cwas/tests/test_pipeline_cwas.py b/CPAC/cwas/tests/test_pipeline_cwas.py
@@ -1,3 +1,21 @@
+# Copyright (C) 2021-2024  C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
+"""Test the CWAS pipeline."""
+from logging import basicConfig, INFO
 import os
 from urllib.error import URLError
 
@@ -8,18 +26,24 @@
 import nilearn.datasets
 
 from CPAC.cwas.pipeline import create_cwas
+from CPAC.utils.monitoring.custom_logging import getLogger
+
+logger = getLogger("CPAC.cwas.tests")
+basicConfig(format="%(message)s", level=INFO)
 
 
 @pytest.mark.parametrize("z_score", [[0], [1], [0, 1], []])
 def test_pipeline(z_score):
+    """Test the CWAS pipeline with z-score forking options."""
     try:
         # pylint: disable=invalid-name
         cc = nilearn.datasets.fetch_atlas_craddock_2012()
     except URLError:
+        logger.info("Could not fetch atlas, skipping test")
         return
     try:
         os.mkdir("/tmp/cwas")
-    except:
+    except:  # noqa: E722
         pass
 
     abide_data = nilearn.datasets.fetch_abide_pcp(n_subjects=10)
@@ -34,7 +58,7 @@ def test_pipeline(z_score):
     assert all(FID in images[i] for i, FID in enumerate(pheno.FILE_ID))
     img = nib.load(cc["scorr_mean"])
     img_data = np.copy(img.get_fdata()[:, :, :, 10])
-    img_data[img_data != 2] = 0.0
+    img_data[img_data != 2] = 0.0  # noqa: PLR2004
     img = nib.Nifti1Image(img_data, img.affine)
     nib.save(img, "/tmp/cwas/roi.nii.gz")