Skip to content

Commit

Permalink
🚨 Lint CWAS files included in test suite
Browse files Browse the repository at this point in the history
  • Loading branch information
shnizzedy committed Feb 6, 2024
1 parent de50ef3 commit 4650f4f
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ extend-select = ["A", "C4", "D", "EM", "F541", "G", "I", "ICN", "NPY", "PL", "RE
target-version = "py37"

[lint]
external = ["T20"]
external = ["T20"] # Don't autoremove 'noqa` comments for these rules

[lint.flake8-import-conventions.extend-aliases]
"CPAC.pipeline.cpac_group_runner" = "cgr"
Expand Down
39 changes: 32 additions & 7 deletions CPAC/cwas/cwas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Copyright (C) 2012-2024 C-PAC Developers

# This file is part of C-PAC.

# C-PAC is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.

# C-PAC is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
"""CWAS module for CPAC."""
import os

import numpy as np
Expand All @@ -15,8 +32,9 @@


def joint_mask(subjects, mask_file=None):
"""
Creates a joint mask (intersection) common to all the subjects in a provided list
"""Create a joint mask.
A joint mask is an intersection common to all the subjects in a provided list
and a provided mask.
Parameters
Expand All @@ -42,12 +60,14 @@ def joint_mask(subjects, mask_file=None):


def calc_mdmrs(D, regressor, cols, permutations):
"""Calculate pseudo-F values and significance probabilities."""
cols = np.array(cols, dtype=np.int32)
F_set, p_set = mdmr(D, regressor, cols, permutations)
return F_set, p_set


def calc_subdists(subjects_data, voxel_range):
"""Calculate the subdistributions of the subjects data."""
subjects, voxels, _ = subjects_data.shape
D = np.zeros((len(voxel_range), subjects, subjects))
for i, v in enumerate(voxel_range):
Expand All @@ -64,12 +84,14 @@ def calc_subdists(subjects_data, voxel_range):
def calc_cwas(
subjects_data, regressor, regressor_selected_cols, permutations, voxel_range
):
"""Calculate CWAS pseudo-F values and significance probabilities."""
D = calc_subdists(subjects_data, voxel_range)
F_set, p_set = calc_mdmrs(D, regressor, regressor_selected_cols, permutations)
return F_set, p_set


def pval_to_zval(p_set, permu):
"""Convert p-values to z-values."""
inv_pval = 1 - p_set
zvals = t.ppf(inv_pval, (len(p_set) - 1))
zvals[zvals == -inf] = permu / (permu + 1)
Expand All @@ -86,8 +108,7 @@ def nifti_cwas(
permutations,
voxel_range,
):
"""
Performs CWAS for a group of subjects.
"""Perform CWAS for a group of subjects.
Parameters
----------
Expand Down Expand Up @@ -120,7 +141,7 @@ def nifti_cwas(
regressor_data = pd.read_table(
regressor_file, sep=None, engine="python", dtype={participant_column: str}
)
except:
except (KeyError, OSError, pd.errors.ParserError, ValueError):
regressor_data = pd.read_table(regressor_file, sep=None, engine="python")
regressor_data = regressor_data.astype({participant_column: str})

Expand Down Expand Up @@ -167,7 +188,7 @@ def nifti_cwas(
)
if len(regressor.shape) == 1:
regressor = regressor[:, np.newaxis]
elif len(regressor.shape) != 2:
elif len(regressor.shape) != 2: # noqa: PLR2004
raise ValueError("Bad regressor shape: %s" % str(regressor.shape))
if len(subject_files) != regressor.shape[0]:
msg = "Number of subjects does not match regressor size"
Expand Down Expand Up @@ -195,19 +216,22 @@ def nifti_cwas(


def create_cwas_batches(mask_file, batches):
"""Create batches of voxels to process in parallel."""
mask = nib.load(mask_file).get_fdata().astype("bool")
voxels = mask.sum(dtype=int)
return np.array_split(np.arange(voxels), batches)


def volumize(mask_image, data):
"""Create a volume from a mask and data."""
mask_data = mask_image.get_fdata().astype("bool")
volume = np.zeros_like(mask_data, dtype=data.dtype)
volume[np.where(mask_data is True)] = data
volume[mask_data] = data
return nib.Nifti1Image(volume, header=mask_image.header, affine=mask_image.affine)


def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):
"""Merge CWAS batches into a single volume."""
_, _, voxel_range = zip(*cwas_batches)
voxels = np.array(np.concatenate(voxel_range))

Expand Down Expand Up @@ -248,6 +272,7 @@ def merge_cwas_batches(cwas_batches, mask_file, z_score, permutations):


def zstat_image(zvals, mask_file):
"""Create a zstat image from zvals and mask_file."""
mask_image = nib.load(mask_file)

z_vol = volumize(mask_image, zvals)
Expand Down
79 changes: 60 additions & 19 deletions CPAC/cwas/tests/test_cwas.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,50 @@
# Copyright (C) 2012-2024 C-PAC Developers

# This file is part of C-PAC.

# C-PAC is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.

# C-PAC is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
"""Test the CWAS pipeline."""
from logging import basicConfig, INFO

import pytest
import nibabel as nib

from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin
from CPAC.utils.monitoring.custom_logging import getLogger

logger = getLogger("CPAC.cwas.tests")
basicConfig(format="%(message)s", level=INFO)


@pytest.mark.skip(reason="requires RegressionTester")
def test_adhd04():
"""Test CWAS with ADHD04 data."""
rt = RegressionTester("adhd04", "diagnosis", "diagnosis")
rt.run()


@pytest.mark.skip(reason="requires RegressionTester")
def test_adhd40():
"""Test CWAS with ADHD40 data."""
rt = RegressionTester("adhd40", "diagnosis", "diagnosis + age + sex + meanFD")
rt.run()


@pytest.mark.skip(reason="requires specific local directory")
class RegressionTester(object):
"""
"""Test the CWAS pipeline in Python and R, and compare the results.
tmp = RegressionTester('adhd04', 'diagnosis', 'diagnosis')
tmp.run().
"""
Expand All @@ -36,13 +63,18 @@ def __init__(
self.formula = formula

def run(self):
"""Run the CWAS pipeline in Python and R, and compare the results."""
logger.info("Python-Based CWAS")
self.run_cwas()

logger.info("R-Based CWAS")
self.run_connectir()

logger.info("Compare Python and R")
self.compare_py_vs_r()

def run_cwas(self):
"""Run the CWAS pipeline in Python."""
import os

os.chdir("%s/C-PAC" % self.base)
Expand Down Expand Up @@ -80,10 +112,8 @@ def run_cwas(self):
###

# Read in list of subject functionals
subjects_list = [
l.strip().strip('"')
for l in open(sfile).readlines() # pylint: disable=consider-using-with
]
with open(sfile) as _f:
subjects_list = [l.strip().strip('"') for l in _f.readlines()] # noqa: E741

# Read in design/regressor file
regressors = np.loadtxt(rfile)
Expand Down Expand Up @@ -111,21 +141,23 @@ def run_cwas(self):
# pass

# Run it!
time.clock()
start = time.clock()
plugin_args = {"n_procs": 4}
c.run(plugin=MultiProcPlugin(plugin_args), plugin_args=plugin_args)
time.clock()
end = time.clock()
logger.info("time: %.2gs", end - start)

def run_connectir(self):
"""
This runs distances and MDMR with connectir.
"""Distances and MDMR with connectir.
This should be run after run_cwas().
"""
import os
import time

time.clock()
start = time.clock()

logger.info("Subject Distances")
cmd = (
"connectir_subdist.R --infuncs1 %(basedir)s/configs/"
"%(outbase)s_funcpaths_4mm.txt --brainmask1 %(basedir)s/"
Expand All @@ -134,8 +166,10 @@ def run_connectir(self):
"--forks 1 --threads 12 %(basedir)s/results_%(outbase)s.r"
% {"basedir": self.base, "outbase": self.name}
)
logger.info("RUNNING: %s", cmd)
os.system(cmd)

logger.info("MDMR")
cmd = (
"connectir_mdmr.R --indir %(basedir)s/results_%(outbase)s.r "
"--formula '%(formula)s' --model "
Expand All @@ -150,13 +184,15 @@ def run_connectir(self):
"nperms": 1000,
}
)
logger.info("RUNNING: %s", cmd)
os.system(cmd)

time.clock()
end = time.clock()
logger.info("time: %.2gs", end - start)

@pytest.mark.skip(reason="No R installation in C-PAC image")
def compare_py_vs_r(self):
"""This will compare the output from the CPAC python vs the R connectir."""
"""Compare the output from the CPAC python vs the R connectir."""
import os

os.chdir("%s/C-PAC" % self.base)
Expand Down Expand Up @@ -239,18 +275,20 @@ def compare_py_vs_r(self):
comp = np.allclose(py_hat, r_hat)
assert_that(comp, "regressors as hat matrices")

comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99
comp = np.corrcoef(py_fs, r_fs[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004
assert_that(comp, "Fstats similarity")

comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99
comp = np.corrcoef(py_ps, r_ps[inds_r2py])[0, 1] > 0.99 # noqa: PLR2004
assert_that(comp, "p-values similarity ")

comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01
comp = abs(py_fs - r_fs[inds_r2py]).mean() < 0.01 # noqa: PLR2004
assert_that(comp, "Fstats difference")

comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05
comp = abs(py_ps - r_ps[inds_r2py]).mean() < 0.05 # noqa: PLR2004
assert_that(comp, "p-values difference")

logger.info("tests were all good")


def test_cwas_connectir():
# add the code to run the same cwas with connectir
Expand Down Expand Up @@ -302,15 +340,18 @@ def test_distances():
mask_file = op.join(sdir, "mask.nii.gz")

sfile = "/home2/data/Projects/CWAS/share/nki/subinfo/40_Set1_N104/short_compcor_funcpaths_4mm_smoothed.txt"
subjects_list = [l.strip().strip('"') for l in open(sfile).readlines()]
subjects_list = [
l.strip().strip('"')
for l in open(sfile).readlines() # noqa: E741
]
subjects_list = subjects_list[:n]
subjects_file_list = subjects_list

mask = nb.load(mask_file).get_fdata().astype("bool")
mask = nib.load(mask_file).get_fdata().astype("bool")
mask_indices = np.where(mask)

subjects_data = [
nb.load(subject_file).get_fdata().astype("float64")[mask_indices].T
nib.load(subject_file).get_fdata().astype("float64")[mask_indices].T
for subject_file in subjects_file_list
]
subjects_data = np.array(subjects_data)
Expand Down
28 changes: 26 additions & 2 deletions CPAC/cwas/tests/test_pipeline_cwas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# Copyright (C) 2021-2024 C-PAC Developers

# This file is part of C-PAC.

# C-PAC is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.

# C-PAC is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
"""Test the CWAS pipeline."""
from logging import basicConfig, INFO
import os
from urllib.error import URLError

Expand All @@ -8,18 +26,24 @@
import nilearn.datasets

from CPAC.cwas.pipeline import create_cwas
from CPAC.utils.monitoring.custom_logging import getLogger

logger = getLogger("CPAC.cwas.tests")
basicConfig(format="%(message)s", level=INFO)


@pytest.mark.parametrize("z_score", [[0], [1], [0, 1], []])
def test_pipeline(z_score):
"""Test the CWAS pipeline with z-score forking options."""
try:
# pylint: disable=invalid-name
cc = nilearn.datasets.fetch_atlas_craddock_2012()
except URLError:
logger.info("Could not fetch atlas, skipping test")
return
try:
os.mkdir("/tmp/cwas")
except:
except: # noqa: E722
pass

abide_data = nilearn.datasets.fetch_abide_pcp(n_subjects=10)
Expand All @@ -34,7 +58,7 @@ def test_pipeline(z_score):
assert all(FID in images[i] for i, FID in enumerate(pheno.FILE_ID))
img = nib.load(cc["scorr_mean"])
img_data = np.copy(img.get_fdata()[:, :, :, 10])
img_data[img_data != 2] = 0.0
img_data[img_data != 2] = 0.0 # noqa: PLR2004
img = nib.Nifti1Image(img_data, img.affine)
nib.save(img, "/tmp/cwas/roi.nii.gz")

Expand Down

0 comments on commit 4650f4f

Please sign in to comment.