diff --git a/scilpy/connectivity/connectivity.py b/scilpy/connectivity/connectivity.py index dd6b75901..65f21c692 100644 --- a/scilpy/connectivity/connectivity.py +++ b/scilpy/connectivity/connectivity.py @@ -120,7 +120,7 @@ def compute_triu_connectivity_from_labels(tractogram, data_labels, return matrix, ordered_labels, start_labels, end_labels -def load_node_nifti(directory, in_label, out_label, ref_img): +def _load_node_nifti(directory, in_label, out_label, ref_img): in_filename = os.path.join(directory, '{}_{}.nii.gz'.format(in_label, out_label)) @@ -255,8 +255,8 @@ def compute_connectivity_matrices_from_hdf5( measures_to_return['streamline_count'] = len(streamlines) if similarity_directory is not None: - density_sim = load_node_nifti(similarity_directory, - in_label, out_label, labels_img) + density_sim = _load_node_nifti(similarity_directory, + in_label, out_label, labels_img) if density_sim is None: ba_vox = 0 else: diff --git a/scilpy/connectivity/tests/test_connectivity.py b/scilpy/connectivity/tests/test_connectivity.py new file mode 100644 index 000000000..384084e7d --- /dev/null +++ b/scilpy/connectivity/tests/test_connectivity.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +import numpy as np + +from scilpy.connectivity.connectivity import \ + compute_triu_connectivity_from_labels + + +def test_compute_triu_connectivity_from_labels(): + labels = np.asarray([[3, 4, 5, 6], + [7, 8, 9, 10]]) + + # streamline 1 starts at label 4, ends at label 6 + # streamline 2 starts at label 9, ends at label 7 + # streamline 3 too, but not in the center (vox, corner) + tractogram = [np.asarray([[0, 1], + [5, 6], + [0, 3]]), + np.asarray([[1, 2], + [1, 0]]), + np.asarray([[1.1, 2.2], + [1.9, 0.5]])] + output, _, _, _ = compute_triu_connectivity_from_labels( + tractogram, labels) + assert np.array_equal(output.shape, [8, 8]) + expected_out = np.zeros((8, 8)) + expected_out[1, 3] = 1 # This is labels (4, 6) + expected_out[4, 6] = 2 # This is labels (7, 9) + assert np.array_equal(output, expected_out) + + +def test_compute_connectivity_matrices_from_hdf5(): + pass + + diff --git a/scilpy/connectivity/tests/test_connectivity_tools.py b/scilpy/connectivity/tests/test_connectivity_tools.py deleted file mode 100644 index d89ad462e..000000000 --- a/scilpy/connectivity/tests/test_connectivity_tools.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- - - -def test_compute_olo(): - pass - - -def test_apply_olo(): - pass - - -def test_parse_ordering(): - pass - - -def test_apply_reordering(): - pass - - -def test_evaluate_graph_measures(): - pass - - -def test_normalize_matrix_from_values(): - pass - - -def test_normalize_matrix_from_parcel(): - pass diff --git a/scilpy/connectivity/tests/test_matrix_tools.py b/scilpy/connectivity/tests/test_matrix_tools.py new file mode 100644 index 000000000..02b6b265d --- /dev/null +++ b/scilpy/connectivity/tests/test_matrix_tools.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +import numpy as np + +from scilpy.connectivity.matrix_tools import apply_reordering + + +def test_compute_olo(): + pass + + +def test_apply_olo(): + pass + + +def test_apply_reordering(): + conn_matrix = np.asarray([[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12], + [13, 14, 15, 16]]) + output = apply_reordering(conn_matrix, [[0, 1, 3, 2], + [1, 2, 3, 0]]) + # First changing rows 2 and 3 + expected_out = np.asarray([[1, 2, 3, 4], + [5, 6, 7, 8], + [13, 14, 15, 16], + [9, 10, 11, 12]]) + # Permuting columns + expected_out = np.asarray([[2, 3, 4, 1], + [6, 7, 8, 5], + [14, 15, 16, 13], + [10, 11, 12, 9]]) + assert np.array_equal(output, expected_out) + + +def test_evaluate_graph_measures(): + pass + + +def test_normalize_matrix_from_values(): + pass + + +def test_normalize_matrix_from_parcel(): + pass diff --git a/scilpy/stats/matrix_stats.py b/scilpy/stats/matrix_stats.py index d77b5e1fc..fe15d5dbd 100644 --- a/scilpy/stats/matrix_stats.py +++ b/scilpy/stats/matrix_stats.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import itertools import logging import bct @@ -7,6 +8,8 @@ from scipy.stats import t as stats_t from statsmodels.stats.multitest import multipletests +from scilpy.tractanalysis.reproducibility_measures import compute_dice_voxel + def _ttest_stat_only(x, y, tail): t = np.mean(x) - np.mean(y) @@ -180,3 +183,59 @@ def omega_sigma(matrix): (path_length / path_length_rand) return float(omega), float(sigma) + + +def pairwise_agreement(matrices, ref_matrix=None, normalize=False): + """ + The similarity measures will be computed for each pair. Alternatively, you + can compare all matrices to a single reference, ref_matrix. + + Parameters + ---------- + matrices: list[np.ndarray] + Input matrices + ref_matrix: Optional[np.ndarray] + Optional reference matrix. + normalize: bool + If true, will normalize all matrices from zero to one. + + Returns + ------- + output_measures_dict: dict + A dict with list of values for each pair of matrices: + { + 'RMSE': root-mean-square error + 'correlation': correlation + 'w_dice_voxels': weighted dice, agreement of the values. + 'dice_voxels': agreement of the binarized matrices + } + """ + def _prepare_matrix(tmp_mat): + # Removing the min now simplifies computations + tmp_mat -= np.min(tmp_mat) + if normalize: + return tmp_mat / np.max(tmp_mat) + return tmp_mat + + matrices = [_prepare_matrix(m) for m in matrices] + if ref_matrix is not None: + ref_matrix = _prepare_matrix(ref_matrix) + + output_measures_dict = {'RMSE': [], 'correlation': [], + 'w_dice_voxels': [], 'dice_voxels': []} + + if ref_matrix is not None: + pairs = list(itertools.product(matrices, [ref_matrix])) + else: + pairs = list(itertools.combinations(matrices, r=2)) + + for i in pairs: + rmse = np.sqrt(np.mean((i[0] - i[1]) ** 2)) + output_measures_dict['RMSE'].append(rmse) + corrcoef = np.corrcoef(i[0].ravel(), i[1].ravel()) + output_measures_dict['correlation'].append(corrcoef[0][1]) + dice, w_dice = compute_dice_voxel(i[0], i[1]) + output_measures_dict['dice_voxels'].append(dice) + output_measures_dict['w_dice_voxels'].append(w_dice) + + return output_measures_dict diff --git a/scripts/scil_connectivity_compare_populations.py b/scripts/scil_connectivity_compare_populations.py index 74a9bcced..1a5b561ec 100755 --- a/scripts/scil_connectivity_compare_populations.py +++ b/scripts/scil_connectivity_compare_populations.py @@ -2,21 +2,31 @@ # -*- coding: utf-8 -*- """ -Performs a network-based statistical comparison for populations g1 and g2. The -output is a matrix of the same size as the input connectivity matrices, with -p-values at each edge. -All input matrices must have the same shape (NxN). For paired t-test, both -groups must have the same number of observations. +Performs a statistical comparison between connectivity matrices for populations +g1 and g2, using a t-test. + +The inputs are any connectivity matrix, that can be obtained with +scil_connectivity_compute_matrices.py, used separately on the two populations. +All input matrices must have the same shape (NxN). + +The output is a matrix of the same size as the input connectivity matrices, +with p-values at each connection (edge). For example, if you have streamline count weighted matrices for a MCI and a -control group and you want to investiguate differences in their connectomes: +control group, and you want to investiguate differences in their connectomes: >>> scil_connectivity_compare_populations.py pval.npy --g1 MCI/*_sc.npy --g2 CTL/*_sc.npy +Options: + --filtering_mask will simply multiply the binary mask to all input matrices before performing the statistical comparison. Reduces the number of statistical tests, useful when using --fdr or --bonferroni. +--paired will use a paired t-test. Then both groups must have the same number +of observations (subjects). They must be listed in the right order using --g1 +and --g2. + Formerly: scil_compare_connectivity.py """ @@ -53,9 +63,11 @@ def _build_arg_parser(): help='Output matrix (.npy) containing the edges p-value.') p.add_argument('--in_g1', nargs='+', required=True, - help='List of matrices for the first population (.npy).') + help='List of matrices for each subject in the first ' + 'population (.npy).\n') p.add_argument('--in_g2', nargs='+', required=True, - help='List of matrices for the second population (.npy).') + help='List of matrices for each subject in the second ' + 'population (.npy).') p.add_argument('--tail', choices=['left', 'right', 'both'], default='both', help='Enables specification of an alternative hypothesis:\n' 'left: mean of g1 < mean of g2,\n' @@ -94,8 +106,7 @@ def main(): args = parser.parse_args() logging.getLogger().setLevel(logging.getLevelName(args.verbose)) - assert_inputs_exist(parser, args.in_g1+args.in_g2, - args.filtering_mask) + assert_inputs_exist(parser, args.in_g1+args.in_g2, args.filtering_mask) assert_outputs_exist(parser, args, args.out_pval_matrix) if args.filtering_mask: diff --git a/scripts/scil_connectivity_compute_matrices.py b/scripts/scil_connectivity_compute_matrices.py index f28a1730b..bbd66644a 100755 --- a/scripts/scil_connectivity_compute_matrices.py +++ b/scripts/scil_connectivity_compute_matrices.py @@ -33,6 +33,8 @@ - Streamline count. - Length: mean streamline length (mm). + Note that this matrix, as well as the volume-weighted, can be used to + normalize a streamline count matrix in scil_connectivity_normalize. - Volume-weighted: Volume of the bundle. - Similarity: mean density. Uses pre-computed density maps, which can be obtained with @@ -54,6 +56,15 @@ - Mean DPS: Mean values in the data_per_streamline of each streamline in the bundles. +What next? +========== +See our other scripts to help you achieve your goals: + - Normalize a streamline-count matrix based on other matrices using + scil_connectivity_normalize. + - Compute a t-test between two groups of subjects using + scil_connectivity_compare_populations. + - See all our scripts starting with scil_connectivity_ for more ideas! + Formerly: scil_compute_connectivity.py """ diff --git a/scripts/scil_connectivity_compute_pca.py b/scripts/scil_connectivity_compute_pca.py index 8a19d7a5f..670c82ddc 100755 --- a/scripts/scil_connectivity_compute_pca.py +++ b/scripts/scil_connectivity_compute_pca.py @@ -2,23 +2,38 @@ # -*- coding: utf-8 -*- """ -Script to compute PCA analysis on diffusion metrics. Output returned is all -significant principal components (e.g. presenting eigenvalues > 1) in a -connectivity matrix format. This script can take into account all edges from -every subject in a population or only non-zero edges across all subjects. +Script to compute PCA analysis on a set of connectivity matrices. The output is +all significant principal components in a connectivity matrix format. +This script can take into account all edges from every subject in a population +or only non-zero edges across all subjects. + +Interpretation of resulting principal components can be done by evaluating the +loadings values for each metrics. A value near 0 means that this metric doesn't +contribute to this specific component whereas high positive or negative values +mean a larger contribution. Components can then be labeled based on which +metric contributes the highest. For example, a principal component showing a +high loading for afd_fixel and near 0 loading for all other metrics can be +interpreted as axonal density (see Gagnon et al. 2022 for this specific example +or ref [3] for an introduction to PCA). The script can take directly as input a connectoflow output folder. Simply use -the --input_connectoflow flag. For other type of folder input, the script -expects a single folder containing all matrices for all subjects. -Example: +the --input_connectoflow flag. Else, the script expects a single folder +containing all matrices for all subjects. Those matrices can be obtained, for +instance, by scil_connectivity_compute_matrices.py. +Example: Default input [in_folder] |--- sub-01_ad.npy |--- sub-01_md.npy |--- sub-02_ad.npy |--- sub-02_md.npy |--- ... +Connectoflow input: + [in_folder] + [subj-01] + [Compute_Connectivity] + |--- ad.npy -The plots, tables and principal components matrices will be outputted in the +The plots, tables and principal components matrices will be saved in the designated folder from the argument. If you want to move back your principal components matrices in your connectoflow output, you can use a similar bash command for all principal components: @@ -27,23 +42,14 @@ cp out_folder/${sub}_PC1.npy connectoflow_output/$sub/Compute_Connectivity/ done -Interpretation of resulting principal components can be done by evaluating the -loadings values for each metrics. A value near 0 means that this metric doesn't -contribute to this specific component whereas high positive or negative values -mean a larger contribution. Components can then be labeled based on which -metric contributes the highest. For example, a principal component showing a -high loading for afd_fixel and near 0 loading for all other metrics can be -interpreted as axonal density (see Gagnon et al. 2022 for this specific example -or ref [3] for an introduction to PCA). - EXAMPLE USAGE: scil_connectivity_compute_pca.py input_folder/ output_folder/ --metrics ad fa md rd [...] --list_ids list_ids.txt """ -# Import required libraries. import argparse import logging +import os import matplotlib.pyplot as plt import numpy as np @@ -55,7 +61,8 @@ save_matrix_in_any_format, add_verbose_arg, add_overwrite_arg, - assert_output_dirs_exist_and_empty) + assert_output_dirs_exist_and_empty, + assert_inputs_dirs_exist, assert_inputs_exist) EPILOG = """ @@ -91,12 +98,15 @@ def _build_arg_parser(): 'followed by the .npy extension.') p.add_argument('--list_ids', required=True, metavar='FILE', help='Path to a .txt file containing a list of all ids.') - p.add_argument('--not_only_common', action='store_true', + p.add_argument('--all_edges', action='store_true', help='If true, will include all edges from all subjects ' 'and not only \ncommon edges (Not recommended)') p.add_argument('--input_connectoflow', action='store_true', help='If true, script will assume the input folder is a ' 'Connectoflow output.') + p.add_argument('--show', action='store_true', + help="If set, show matplotlib figures. Else, they are " + "only saved in the output folder.") add_verbose_arg(p) add_overwrite_arg(p) @@ -104,41 +114,72 @@ def _build_arg_parser(): return p -def generate_pca_input(dictionary): - """ - Function to create PCA input from matrix. - :param dictionary: Dictionary with metrics as keys containing list of matrices sorted by ids. - :return: Numpy array. - """ - for metric in dictionary.keys(): - mat = np.rollaxis(np.array(dictionary[metric]), axis=1, start=3) - matrix_shape = mat.shape[1:3] - n_group = mat.shape[0] - mat = mat.reshape((np.prod(matrix_shape) * n_group, 1)) - dictionary[metric] = mat +def plot_eigenvalues(pca, principaldf, nb_metrics, out_file): + # Plot the eigenvalues. + logging.info('Plotting results...') + eigenvalues = pca.explained_variance_ + pos = list(range(1, nb_metrics + 1)) - return np.hstack([dictionary[i] for i in dictionary.keys()]) + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + bar_eig = ax.bar(pos, eigenvalues, align='center', + tick_label=principaldf.columns) + ax.set_xlabel('Principal Components', fontsize=10) + ax.set_ylabel('Eigenvalues', fontsize=10) + ax.set_title('Eigenvalues for each principal components.', fontsize=10) + ax.margins(0.1) + autolabel(bar_eig, ax) + plt.savefig(out_file) + return eigenvalues -def restore_zero_values(orig, new): - """ - Function to restore 0 values in a numpy array. - :param orig: Original numpy array containing 0 values to restore. - :param new: Array in which 0 values need to be restored. - :return: Numpy array with data from the new array but zeros from the original array. - """ - mask = np.copy(orig) - mask[mask != 0] = 1 +def plot_explained_variance(pca, principaldf, nb_metrics, out_file): + # Plot the explained variance. + explained_var = pca.explained_variance_ratio_ + pos = list(range(1, nb_metrics + 1)) + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + bar_var = ax.bar(pos, explained_var, align='center', + tick_label=principaldf.columns) + ax.set_xlabel('Principal Components', fontsize=10) + ax.set_ylabel('Explained variance', fontsize=10) + ax.set_title('Amount of explained variance for all principal components.', + fontsize=10) + ax.margins(0.1) + autolabel(bar_var, ax) + plt.savefig(out_file) - return np.multiply(new, mask) + +def plot_contribution(pca, principaldf, metrics, out_excel, out_image): + # Plot the contribution of each measures to principal component. + component = pca.components_ + output_component = pd.DataFrame(component, index=principaldf.columns, + columns=metrics) + output_component.to_excel(out_excel, index=True, header=True) + + fig, axs = plt.subplots(2) + fig.suptitle('Graph of the contribution of each measures to the first ' + 'and second principal component.', fontsize=10) + pos = list(range(1, len(metrics) + 1)) + bar_pc1 = axs[0].bar(pos, component[0], align='center', + tick_label=metrics) + bar_pc2 = axs[1].bar(pos, component[1], align='center', + tick_label=metrics) + axs[0].margins(0.2) + axs[1].margins(0.2) + autolabel(bar_pc1, axs[0]) + autolabel(bar_pc2, axs[1]) + for ax in axs.flat: + ax.set(xlabel='Diffusion measures', ylabel='Loadings') + for ax in axs.flat: + ax.label_outer() + plt.savefig(out_image) def autolabel(rects, axs): """ Attach a text label above each bar displaying its height (or bar value). - :param rects: Graphical object. - :param axs: Axe number. - :return: """ for rect in rects: height = rect.get_height() @@ -150,179 +191,128 @@ def autolabel(rects, axs): '%.3f' % float(height), ha='center', va='bottom') -def extracting_common_cnx(dictionary, ind): - """ - Function to create a binary mask representing common connections across the population - containing non-zero values. - :param dictionary: Dictionary with metrics as keys containing list of matrices sorted by ids. - :param ind: Indice of the key to use to generate the binary mask from the dictionary. - :return: Binary mask. - """ - keys = list(dictionary.keys()) - met = np.copy(dictionary[keys[ind]]) - - # Replacing all non-zero values by 1. - for i in range(0, len(met)): - met[i][met[i] != 0] = 1 - - # Adding all matrices together. - orig = np.copy(met[0]) - mask = [orig] - for i in range(1, len(met)): - orig = np.add(orig, met[i]) - mask.append(orig) - - # Converting resulting values to 0 and 1. - mask_f = mask[(len(met) - 1)] - mask_f[mask_f != len(met)] = 0 - mask_f[mask_f == len(met)] = 1 - - return mask_f - - -def apply_binary_mask(dictionary, mask): - """ - Function to apply a binary mask to all matrices contained in a dictionary. - :param dictionary: Dictionary with metrics as keys containing list of matrices sorted by ids. - :param mask: Binary mask. - :return: Dictionary with the same shape as input. - """ - for a in dictionary.keys(): - for i in range(0, len(dictionary[a])): - dictionary[a][i] = np.multiply(dictionary[a][i], mask) - - return dictionary - - def main(): parser = _build_arg_parser() args = parser.parse_args() logging.getLogger().setLevel(logging.getLevelName(args.verbose)) - assert_output_dirs_exist_and_empty(parser, args, args.out_folder, create_dir=True) + assert_inputs_dirs_exist(parser, args.in_folder) + assert_inputs_exist(parser, args.list_ids) + assert_output_dirs_exist_and_empty(parser, args, args.out_folder, + create_dir=True) + out_eigenvalues = os.path.join(args.out_folder, 'eigenvalues.pdf') + out_variance = os.path.join(args.out_folder, 'explained_variance.pdf') + out_excel = os.path.join(args.out_folder, 'loadings.xlsx') + out_contributions = os.path.join(args.out_folder, 'contribution.pdf') with open(args.list_ids) as f: subjects = f.read().split() + # Loading all matrices. if args.input_connectoflow: - # Loading all matrix. logging.info('Loading all matrices from a Connectoflow output...') - dictionary = {m: [load_matrix_in_any_format(f'{args.in_folder}/{a}/Compute_Connectivity/{m}.npy') - for a in subjects] - for m in args.metrics} + files_per_metric = [[os.path.join( + args.in_folder, a, 'Compute_Connectivity', '{}.npy'.format(m)) + for a in subjects] + for m in args.metrics] else: logging.info('Loading all matrices...') - dictionary = {m: [load_matrix_in_any_format(f'{args.in_folder}/{a}_{m}.npy') for a in subjects] - for m in args.metrics} - # Assert that all metrics have the same number of subjects. - nb_sub = [len(dictionary[m]) for m in args.metrics] - assert all(x == len(subjects) for x in nb_sub), "Error, the number of matrices for each metric doesn't match" \ - "the number of subject in the id list." \ - "Please validate input folder." + files_per_metric = [[os.path.join( + args.in_folder, '{}_{}.npy'.format(a, m)) + for a in subjects] + for m in args.metrics] + + assert_inputs_exist(parser, sum(files_per_metric, [])) + matrices_per_metric = [[load_matrix_in_any_format(f) for f in files] + for files in files_per_metric] # Setting individual matrix shape. - mat_shape = dictionary[args.metrics[0]][0].shape + mat_shape = matrices_per_metric[0][0].shape - if args.not_only_common: - # Creating input structure using all edges from all subjects. + # Creating input structure + if args.all_edges: logging.info('Creating PCA input structure with all edges...') - df = generate_pca_input(dictionary) else: - m1 = extracting_common_cnx(dictionary, 0) - m2 = extracting_common_cnx(dictionary, 1) - - if m1.shape != mat_shape: - parser.error("Extracted binary mask doesn't match the shape of individual input matrix.") - - if np.sum(m1) != np.sum(m2): - parser.error("Different binary mask have been generated from 2 different metrics, \n " - "please validate input data.") - else: - logging.info('Data shows {} common connections across the population.'.format(np.sum(m1))) - - dictionary = apply_binary_mask(dictionary, m1) - - # Creating input structure. logging.info('Creating PCA input structure with common edges...') - df = generate_pca_input(dictionary) - - # Setting 0 values to nan. - df[df == 0] = 'nan' - - # Standardized the data. + nb_subjects = len(matrices_per_metric[0]) + + # Using the first metric + subj_masks = [m != 0 for m in matrices_per_metric[0]] # Mask per subj + common_edges_mask = np.sum(subj_masks, axis=0) == nb_subjects + + # Verifying that other metrics have the same common edges + for i, matrices in enumerate(matrices_per_metric[1:]): + tmp_subj_masks = [m != 0 for m in matrices] # Binary per subj + tmp_common = np.sum(tmp_subj_masks, axis=0) == nb_subjects + if not np.array_equal(common_edges_mask, tmp_common): + parser.error("Different binary masks (common edge) have been " + "generated from metric {} than other metrics. " + "Please validate your input data." + .format(args.metrics[i])) + + plt.figure() + plt.imshow(common_edges_mask) + plt.title(common_edges_mask) + + logging.info('Data shows {} common connections across the population.' + .format(np.sum(common_edges_mask))) + + # Apply mask + matrices_per_metric = [[m * common_edges_mask + for m in metric_matrices] + for metric_matrices in matrices_per_metric] + + # Creating input structure. + # For each metric, combining all subjects together, flattening all + # connectivity matrices. + flat_mats = [] + for metric_matrices in matrices_per_metric: + mat = np.rollaxis(np.array(metric_matrices), axis=1, start=3) + mat = mat.reshape((np.prod(mat.shape), 1)) + flat_mats.append(mat) + df = np.hstack(flat_mats) # Shape: nb_metrics x (flattened data) + df[df == 0] = np.nan # Setting 0 values to nan. + + # Standardizing the data. logging.info('Standardizing data...') x = StandardScaler().fit_transform(df) df_pca = x[~np.isnan(x).any(axis=1)] x = np.nan_to_num(x, nan=0.) - # Perform the PCA. + # Performing the PCA. logging.info('Performing PCA...') pca = PCA(n_components=len(args.metrics)) principalcomponents = pca.fit_transform(df_pca) - principaldf = pd.DataFrame(data=principalcomponents, columns=[f'PC{i}' for i in range(1, len(args.metrics) + 1)]) - - # Plot the eigenvalues. - logging.info('Plotting results...') - eigenvalues = pca.explained_variance_ - pos = list(range(1, len(args.metrics)+1)) - plt.clf() - plt.cla() - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - bar_eig = ax.bar(pos, eigenvalues, align='center', tick_label=principaldf.columns) - ax.set_xlabel('Principal Components', fontsize=10) - ax.set_ylabel('Eigenvalues', fontsize=10) - ax.set_title('Eigenvalues for each principal components.', fontsize=10) - ax.margins(0.1) - autolabel(bar_eig, ax) - plt.savefig(f'{args.out_folder}/eigenvalues.pdf') - - # Plot the explained variance. - explained_var = pca.explained_variance_ratio_ - plt.clf() - plt.cla() - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - bar_var = ax.bar(pos, explained_var, align='center', tick_label=principaldf.columns) - ax.set_xlabel('Principal Components', fontsize=10) - ax.set_ylabel('Explained variance', fontsize=10) - ax.set_title('Amount of explained variance for all principal components.', fontsize=10) - ax.margins(0.1) - autolabel(bar_var, ax) - plt.savefig(f'{args.out_folder}/explained_variance.pdf') - - # Plot the contribution of each measures to principal component. - component = pca.components_ - output_component = pd.DataFrame(component, index=principaldf.columns, columns=args.metrics) - output_component.to_excel(f'{args.out_folder}/loadings.xlsx', index=True, header=True) - plt.clf() - plt.cla() - fig, axs = plt.subplots(2) - fig.suptitle('Graph of the contribution of each measures to the first and second principal component.', fontsize=10) - bar_pc1 = axs[0].bar(pos, component[0], align='center', tick_label=args.metrics) - bar_pc2 = axs[1].bar(pos, component[1], align='center', tick_label=args.metrics) - axs[0].margins(0.2) - axs[1].margins(0.2) - autolabel(bar_pc1, axs[0]) - autolabel(bar_pc2, axs[1]) - for ax in axs.flat: - ax.set(xlabel='Diffusion measures', ylabel='Loadings') - for ax in axs.flat: - ax.label_outer() - plt.savefig(f'{args.out_folder}/contribution.pdf') + principaldf = pd.DataFrame( + data=principalcomponents, + columns=[f'PC{i}' for i in range(1, len(args.metrics) + 1)]) + + # Plotting + eigenvalues = plot_eigenvalues(pca, principaldf, + nb_metrics=len(args.metrics), + out_file=out_eigenvalues) + plot_explained_variance(pca, principaldf, nb_metrics=len(args.metrics), + out_file=out_variance) + plot_contribution(pca, principaldf, args.metrics, out_excel, + out_contributions) # Extract the derived newly computed measures from the PCA analysis. logging.info('Saving matrices for PC with eigenvalues > 1...') out = pca.transform(x) - out = restore_zero_values(x, out) - out = out.swapaxes(0, 1).reshape(len(args.metrics), len(subjects), mat_shape[0], mat_shape[1]) + out = out * (x != 0) + out = out.swapaxes(0, 1).reshape(len(args.metrics), len(subjects), + mat_shape[0], mat_shape[1]) # Save matrix for significant components nb_pc = eigenvalues[eigenvalues >= 1] for i in range(0, len(nb_pc)): for s in range(0, len(subjects)): - save_matrix_in_any_format(f'{args.out_folder}/{subjects[s]}_PC{i+1}.npy', - out[i, s, :, :]) + filename = os.path.join(args.out_folder, + f'{subjects[s]}_PC{i+1}.npy') + save_matrix_in_any_format(filename, out[i, s, :, :]) + + if args.show: + plt.show() if __name__ == "__main__": diff --git a/scripts/scil_connectivity_filter.py b/scripts/scil_connectivity_filter.py index 024501402..73d7a6ee5 100755 --- a/scripts/scil_connectivity_filter.py +++ b/scripts/scil_connectivity_filter.py @@ -9,7 +9,7 @@ Can be used with any connectivity matrix from scil_connectivity_compute_matrices.py. -For example, a simple filtering (Jasmeen style) would be: +For example, a filtering as performed in [1] would be: scil_connectivity_filter.py out_mask.npy --greater_than */sc.npy 1 0.90 --lower_than */sim.npy 2 0.90 @@ -17,10 +17,10 @@ This will result in a binary mask where each node with a value of 1 represents a node with at least 90% of the population having at least 1 streamline, -90% of the population is similar to the average (2mm) and 90% of the +90% of the population being similar to the average (2mm) and 90% of the population having at least 40mm of average streamlines length. -All operation are stricly > or <, there is no >= or <=. +All operations are stricly > or <, there is no >= or <=. --greater_than or --lower_than expect the same convention: MATRICES_LIST VALUE_THR POPULATION_PERC @@ -28,10 +28,15 @@ connectivity matrices is used for each condition. This script performs an intersection of all conditions, meaning that all -conditions must be met in order not to be filtered. +conditions must be met in order to not be filtered. + If the user wants to manually handle the requirements, --keep_condition_count can be used and manually binarized using scil_connectivity_math.py +[1] Sidhu, J. (2022). Inter-lobar Connectivity of the Frontal Lobe Association +Tracts Consistently Observed in 105 Healthy Adults +(Doctoral dissertation, Université de Sherbrooke). + Formerly: scil_filter_connectivity.py """ @@ -44,7 +49,7 @@ from scilpy.io.utils import (add_overwrite_arg, add_verbose_arg, assert_outputs_exist, load_matrix_in_any_format, - save_matrix_in_any_format) + save_matrix_in_any_format, assert_inputs_exist) def _build_arg_parser(): @@ -57,10 +62,12 @@ def _build_arg_parser(): 'conditions (.npy).') p.add_argument('--lower_than', nargs='*', action='append', + metavar='MATRICES_LIST VALUE_THR POPULATION_PERC', help='Lower than condition using the VALUE_THR in ' 'at least POPULATION_PERC (from MATRICES_LIST).\n' 'See description for more details.') p.add_argument('--greater_than', nargs='*', action='append', + metavar='MATRICES_LIST VALUE_THR POPULATION_PERC', help='Greater than condition using the VALUE_THR in ' 'at least POPULATION_PERC (from MATRICES_LIST).\n' 'See description for more details.') @@ -78,11 +85,37 @@ def _build_arg_parser(): return p +def _filter(input_list, condition): + matrices = [load_matrix_in_any_format(i) for i in input_list[:-2]] + shape = matrices[0].shape + matrices = np.rollaxis(np.array(matrices), axis=0, start=3) + value_threshold = float(input_list[-2]) + population_threshold = int(float(input_list[-1]) * matrices.shape[-1]) + + empty_matrices = np.zeros(matrices.shape) + # Only difference between both condition, the rest is identical + if condition == 'lower': + empty_matrices[matrices < value_threshold] = 1 + else: + empty_matrices[matrices > value_threshold] = 1 + population_score = np.sum(empty_matrices, axis=2) + + filter_mask = population_score > population_threshold + logging.info('Condition {}_than resulted in {} filtered ' + 'elements out of {}.'.format( + condition, np.count_nonzero(~filter_mask), np.prod(shape))) + + return filter_mask + + def main(): parser = _build_arg_parser() args = parser.parse_args() logging.getLogger().setLevel(logging.getLevelName(args.verbose)) + inputs = [m for cond in args.lower_than for m in cond[:-2]] + inputs.extend([m for cond in args.greater_than for m in cond[:-2]]) + assert_inputs_exist(parser, inputs) assert_outputs_exist(parser, args, args.out_matrix_mask) if not args.lower_than and not args.greater_than: @@ -99,34 +132,16 @@ def main(): condition_counter = 0 shape = load_matrix_in_any_format(conditions_list[0][1][0]).shape output_mask = np.zeros(shape) - for input_tuple_list in conditions_list: - condition = input_tuple_list[0] - input_list = input_tuple_list[1] + for input_list in args.lower_than: condition_counter += 1 - matrices = [load_matrix_in_any_format(i) for i in input_list[:-2]] - matrices = np.rollaxis(np.array(matrices), axis=0, start=3) - value_threshold = float(input_list[-2]) - population_threshold = int(float(input_list[-1]) * matrices.shape[-1]) - - empty_matrices = np.zeros(matrices.shape) - # Only difference between both condition, the rest is identical - if condition == 'lower': - empty_matrices[matrices < value_threshold] = 1 - else: - empty_matrices[matrices > value_threshold] = 1 + filter_mask = _filter(input_list, 'lower') + output_mask[filter_mask] += 1 - population_score = np.sum(empty_matrices, axis=2) - - logging.info('Condition {}_than (#{}) resulted in {} filtered ' - 'elements out of {}.'.format( - condition, - condition_counter, - len(np.where(population_score < - population_threshold)[0]), - np.prod(shape))) - - output_mask[population_score > population_threshold] += 1 + for input_list in args.lower_than: + condition_counter += 1 + filter_mask = _filter(input_list, 'gteater') + output_mask[filter_mask] += 1 if not args.keep_condition_count: output_mask[output_mask < condition_counter] = 0 @@ -136,7 +151,7 @@ def main(): if args.keep_condition_count: output_mask = np.abs(output_mask - np.max(output_mask)) else: - output_mask = invert([output_mask]) + output_mask = invert([output_mask], ref_img=None) filtered_elem = np.prod(shape) - np.count_nonzero(output_mask) diff --git a/scripts/scil_connectivity_graph_measures.py b/scripts/scil_connectivity_graph_measures.py index 8c8a37e2d..2290d44ff 100755 --- a/scripts/scil_connectivity_graph_measures.py +++ b/scripts/scil_connectivity_graph_measures.py @@ -3,7 +3,7 @@ """ Evaluate graph theory measures from connectivity matrices. -A length weighted and a streamline count weighted matrix are required since +A length-weighted and a streamline count-weighted matrix are required since some measures require one or the other. This script evaluates the measures one subject at the time. To generate a @@ -13,8 +13,7 @@ ${i}/len_prob.npy hcp_prob.json --append_json --avg_node_wise; done Some measures output one value per node, the default behavior is to list -them all into a list. To obtain only the average use the ---avg_node_wise option. +them all. To obtain only the average use the --avg_node_wise option. The computed connectivity measures are: centrality, modularity, assortativity, participation, clustering, @@ -59,7 +58,7 @@ def _build_arg_parser(): help='Input connectivity matrix (.npy).\n' 'Typically a streamline count weighted matrix.') p.add_argument('in_length_matrix', - help='Input length weighted matrix (.npy).') + help='Input length-weighted matrix (.npy).') p.add_argument('out_json', help='Path of the output json.') @@ -106,7 +105,8 @@ def main(): len_matrix = load_matrix_in_any_format(args.in_length_matrix) if args.filtering_mask: - mask_matrix = load_matrix_in_any_format(args.filtering_mask) + mask_matrix = load_matrix_in_any_format( + args.filtering_mask).astype(bool) conn_matrix *= mask_matrix len_matrix *= mask_matrix diff --git a/scripts/scil_connectivity_math.py b/scripts/scil_connectivity_math.py index f4830f34a..ed8391fc1 100755 --- a/scripts/scil_connectivity_math.py +++ b/scripts/scil_connectivity_math.py @@ -94,14 +94,15 @@ def main(): if args.operation not in OPERATIONS.keys(): parser.error('Operation {} not implemented.'.format(args.operation)) + if args.operation == 'convert' and not args.data_type: + parser.error('Convert operation must be used with --data_type.') # Find at least one matrix for reference + found_ref = False for input_arg in args.in_matrices: - found_ref = False if not is_float(input_arg): - ref_data = load_matrix_in_any_format(input_arg) - ref_matrix = nib.Nifti1Image(ref_data, np.eye(4)) - mask = np.zeros(ref_data.shape) + ref_matrix = load_matrix(input_arg) + mask = np.zeros(ref_matrix.shape) found_ref = True break @@ -131,10 +132,7 @@ def main(): mask[data > 0] = 1 input_matrices.append(matrix) - if args.operation == 'convert' and not args.data_type: - parser.error('Convert operation must be used with --data_type.') - - # Perform the request operation + # Perform the requested operation try: output_data = OPERATIONS[args.operation](input_matrices, ref_matrix) except ValueError: diff --git a/scripts/scil_connectivity_normalize.py b/scripts/scil_connectivity_normalize.py index 8a3fd8a06..c18ba9654 100755 --- a/scripts/scil_connectivity_normalize.py +++ b/scripts/scil_connectivity_normalize.py @@ -4,7 +4,9 @@ """ Normalize a connectivity matrix coming from scil_tractogram_segment_connections_from_labels.py. -3 categories of normalization are available: + +3 categories of normalization are available, with options for each. You may +choose any number of non-mutually exclusive options: -- Edge attributes - length: Multiply each edge by the average bundle length. Compensate for far away connections when using interface seeding. @@ -31,10 +33,10 @@ - sum_to_one: Ensure the sum of all edges weight is one - log_10: Apply a base 10 logarithm to all edges weight -The volume and length matrix should come from the +The volume and length matrices should come from the scil_tractogram_segment_connections_from_labels.py script. -A review of the type of normalization is available in: +A review of the types of normalization is available in: Colon-Perez, Luis M., et al. "Dimensionless, scale-invariant, edge weight metric for the study of complex structural networks." PLOS one 10.7 (2015). @@ -83,13 +85,14 @@ def _build_arg_parser(): help='Volume matrix used for edge-wise division.') vol = edge_p.add_mutually_exclusive_group() - # toDo. Same description for the two options vol.add_argument('--parcel_volume', nargs=2, metavar=('ATLAS', 'LABELS_LIST'), - help='Atlas and labels list for edge-wise division.') + help='Atlas and labels list for edge-wise division by \n' + 'the sum of node volum.') vol.add_argument('--parcel_surface', nargs=2, metavar=('ATLAS', 'LABELS_LIST'), - help='Atlas and labels list for edge-wise division.') + help='Atlas and labels list for edge-wise division by \n' + 'the sum of the node surface.') scaling_p = p.add_argument_group('Scaling options') scale = scaling_p.add_mutually_exclusive_group() diff --git a/scripts/scil_connectivity_pairwise_agreement.py b/scripts/scil_connectivity_pairwise_agreement.py index 41e62687d..79eeb2023 100755 --- a/scripts/scil_connectivity_pairwise_agreement.py +++ b/scripts/scil_connectivity_pairwise_agreement.py @@ -2,10 +2,17 @@ # -*- coding: utf-8 -*- """ -Evaluate pair-wise similarity measures of connectivity matrix. +Evaluate pair-wise similarity measures of connectivity matrices. The computed similarity measures are: -sum of square difference and pearson correlation coefficent +- RMSE: root-mean-square difference +- Pearson correlation coefficent +- w-dice: weighted dice, agreement of the values. +- dice: agreement of the binarized matrices + +If more than two matrices are given in input, the similarity measures will be +computed for each pair. Alternatively, you can compare all matrices to a +single reference, using --single_compare. Formerly: scil_evaluate_connectivity_pairwaise_agreement_measures.py """ @@ -23,6 +30,7 @@ assert_inputs_exist, assert_outputs_exist, load_matrix_in_any_format) +from scilpy.stats.matrix_stats import pairwise_agreement from scilpy.tractanalysis.reproducibility_measures import compute_dice_voxel @@ -53,48 +61,27 @@ def main(): args = parser.parse_args() logging.getLogger().setLevel(logging.getLevelName(args.verbose)) - assert_inputs_exist(parser, args.in_matrices) + assert_inputs_exist(parser, args.in_matrices, args.single_compare) assert_outputs_exist(parser, args, args.out_json) + # Check that --single_compare is not loaded twice + if args.single_compare and args.single_compare in args.in_matrices: + id = args.in_matrices.index(args.single_compare) + args.in_matrices.pop(id) + + # Load matrices all_matrices = [] for filename in args.in_matrices: - tmp_mat = load_matrix_in_any_format(filename) - tmp_mat = tmp_mat.astype(float) - tmp_mat -= np.min(tmp_mat) - if args.normalize: - all_matrices.append(tmp_mat / np.max(tmp_mat)) - else: - all_matrices.append(tmp_mat) - - if args.single_compare: - tmp_mat = load_matrix_in_any_format(args.single_compare) - tmp_mat = tmp_mat.astype(float) - tmp_mat -= np.min(tmp_mat) - if args.normalize: - all_matrices.append(tmp_mat / np.max(tmp_mat)) - else: - all_matrices.append(tmp_mat) - - output_measures_dict = {'RMSE': [], 'correlation': [], - 'w_dice_voxels': [], 'dice_voxels': []} + all_matrices.append(load_matrix_in_any_format(filename).astype(float)) + ref_matrix = None if args.single_compare: - if args.single_compare in args.in_matrices: - id = args.in_matrices.index(args.single_compare) - all_matrices.pop(id) - pairs = list(itertools.product(all_matrices[:-1], [all_matrices[-1]])) - else: - pairs = list(itertools.combinations(all_matrices, r=2)) - - for i in pairs: - rmse = np.sqrt(np.mean((i[0]-i[1])**2)) - output_measures_dict['RMSE'].append(rmse) - corrcoef = np.corrcoef(i[0].ravel(), i[1].ravel()) - output_measures_dict['correlation'].append(corrcoef[0][1]) - dice, w_dice = compute_dice_voxel(i[0], i[1]) - output_measures_dict['dice_voxels'].append(dice) - output_measures_dict['w_dice_voxels'].append(w_dice) + ref_matrix = load_matrix_in_any_format( + args.single_compare).astype(float) + # Compute and save + output_measures_dict = pairwise_agreement(all_matrices, ref_matrix, + normalize=args.normalize) with open(args.out_json, 'w') as outfile: json.dump(output_measures_dict, outfile, indent=args.indent, sort_keys=args.sort_keys) diff --git a/scripts/scil_connectivity_reorder_rois.py b/scripts/scil_connectivity_reorder_rois.py index 9ed78ab5d..9d1d78339 100755 --- a/scripts/scil_connectivity_reorder_rois.py +++ b/scripts/scil_connectivity_reorder_rois.py @@ -2,20 +2,23 @@ # -*- coding: utf-8 -*- """ -Re-order one or many connectivity matrices using a text file format. -The first row are the (x) and the second row the (y), must be space separated. -The resulting matrix does not have to be square (support unequal number of -x and y). +Re-order one or many connectivity matrices' rows and columns. The connectivity +matrices can come, for instance, from scil_connectivity_computes_matrices.py + +To state the new order, use a text file with the following formatting: +The first row are the new x order and the second row the new y order, using +integer values separated by a space. The resulting matrix does not have to be +square (supports unequal number of x and y). The values refer to the coordinates (starting at 0) in the matrix, but if the --labels_list parameter is used, the values will refer to the label which will be converted to the appropriate coordinates. This file must be the same as the one provided to the scil_tractogram_segment_connections_from_labels.py. -To subsequently use scil_visualize_connectivity.py with a lookup table, you +To subsequently use scil_viz_connectivity.py with a lookup table, you must use a label-based reording json and use --labels_list. -You can also use the Optimal Leaf Ordering(OLO) algorithm to transform a +You can also use the Optimal Leaf Ordering (OLO) algorithm to transform a sparse matrix into an ordering that reduces the matrix bandwidth. The output file can then be re-used with --in_ordering. Only one input can be used with this option, we recommand an average streamline count or volume matrix. @@ -61,9 +64,12 @@ def _build_arg_parser(): 'structures along the diagonal.') p.add_argument('--out_suffix', - help='Suffix for the output matrix filename.') + help="Suffix for the output matrices filenames. It will " + "be appended to each input matrix's name.") p.add_argument('--out_dir', - help='Output directory for the re-ordered matrices.') + help='Output directory for the re-ordered matrices.\n' + 'If not set, each output matrix will be saved in ' + 'the same \ndirectory as the input matrix.') p.add_argument('--labels_list', help='List saved by the decomposition script,\n' '--in_ordering must contain labels rather than ' @@ -77,10 +83,15 @@ def _build_arg_parser(): def parse_ordering(in_ordering_file, labels_list=None): """ - toDo. Docstring please. + Read the ordering file, which should contain two lines, with integer + values for each. """ + # Cannot load with numpy in case of non-squared matrix (unequal x/y) with open(in_ordering_file, 'r') as my_file: lines = my_file.readlines() + if len(lines) != 2: + raise ValueError("The ordering file should contain exactly two " + "lines of text.") ordering = [[int(val) for val in lines[0].split()], [int(val) for val in lines[1].split()]] if labels_list: @@ -103,8 +114,12 @@ def main(): assert_inputs_exist(parser, args.in_matrices, [args.labels_list, args.in_ordering]) assert_output_dirs_exist_and_empty(parser, args, [], args.out_dir) + # Verification of output matrices names will be done below. if args.optimal_leaf_ordering is not None: + if args.out_suffix or args.out_dir: + logging.warning("Options --out_suffix and --out_dir are ignored " + "with option --optimal_leaf_ordering.") if len(args.in_matrices) > 1: parser.error('Only one input is supported with RCM.') assert_outputs_exist(parser, args, args.optimal_leaf_ordering) @@ -125,29 +140,17 @@ def main(): basename, ext = os.path.splitext(filename) basename = os.path.basename(basename) - curr_filename = os.path.join(out_dir, - '{}{}.{}'.format(basename, - args.out_suffix, - ext[1:])) + curr_filename = os.path.join( + out_dir, '{}{}.{}'.format(basename, args.out_suffix, ext[1:])) out_filenames.append(curr_filename) assert_outputs_exist(parser, args, out_filenames) - # Cannot load with numpy in case of non-squared matrix (unequal x/y) ordering = parse_ordering(args.in_ordering, args.labels_list) - for filename in args.in_matrices: - out_dir = os.path.dirname(filename) if args.out_dir is None \ - else args.out_dir - basename, ext = os.path.splitext(filename) - basename = os.path.basename(basename) - matrix = load_matrix_in_any_format(filename) + for in_name, out_name in zip(args.in_matrices, out_filenames): + matrix = load_matrix_in_any_format(in_name) reordered_matrix = apply_reordering(matrix, ordering) - curr_filename = os.path.join(out_dir, - '{}{}.{}'.format(basename, - args.out_suffix, - ext[1:])) - - save_matrix_in_any_format(curr_filename, reordered_matrix) + save_matrix_in_any_format(out_name, reordered_matrix) if __name__ == "__main__":