Skip to content

Commit

Permalink
Merge pull request #15 from OnnoKampman/refactor
Browse files Browse the repository at this point in the history
Further refactoring
  • Loading branch information
OnnoKampman authored Jun 1, 2024
2 parents bde162b + 6638d77 commit 4d5ef5a
Show file tree
Hide file tree
Showing 499 changed files with 4,885 additions and 2,038 deletions.
7 changes: 4 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
.ipynb_checkpoints/
__pycache__

# Data.
# Data
opk20_hivemind_paper_1/

**.nii.gz
**.tar.gz

datasets/fmri/rs/HCP/hcp-openaccess/
# HCP_PTN1200_recon2/
datasets/fmri/rs/HCP/HCP_PTN1200_recon2/

# Results.
# Results

results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k01/
results/fmri/rs/HCP/HCP_PTN1200_recon2/3T_HCP1200_MSMAll_d15_ts2/brain_states/k02/
Expand Down
52 changes: 52 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# https://citation-file-format.github.io

cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Kampman"
given-names: "Onno P."
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Ziminski"
given-names: "Joe"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Afyouni"
given-names: "Soroosh"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "van der Wilk"
given-names: "Mark"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Kourtzi"
given-names: "Zoe"
orcid: "https://orcid.org/0000-0000-0000-0000"
title: "FCEst-benchmarking"
version: 0.0.1
doi: 10.5281/zenodo.1234
date-released: 2024-05-17
url: "https://github.com/OnnoKampman/FCEst-benchmarking"
preferred-citation:
type: article
authors:
- family-names: "Kampman"
given-names: "Onno P."
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Ziminski"
given-names: "Joe"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Afyouni"
given-names: "Soroosh"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "van der Wilk"
given-names: "Mark"
orcid: "https://orcid.org/0000-0000-0000-0000"
- family-names: "Kourtzi"
given-names: "Zoe"
orcid: "https://orcid.org/0000-0000-0000-0000"
doi: "10.1162/imag_a_00184"
journal: "Imaging Neuroscience"
month: 5
start: 1 # First page number
end: 28 # Last page number
title: "Time-varying functional connectivity as Wishart processes"
issue: 1
volume: 2
year: 2024
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Functional connectivity (FC) estimates are sensitive to choice of estimation met
This project aims to design a range of benchmarks to determine what estimation method to use.

These benchmarks have been particularly developed to test the estimation methods included in the [FCEst](https://github.com/OnnoKampman/FCEst) Python package.
Results have been published in an article in Imaging Neuroscience (see `CITATION.cff`).

Many extensions of this project are possible, both in terms of adding more estimation methods and more benchmarks.

## Contributing

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
subset_dimensionality=data_dimensionality,
hostname=socket.gethostname()
)
n_subjects = cfg['n-subjects']
n_time_series = int(data_dimensionality[1:])
num_subjects = cfg['n-subjects']
num_time_series = int(data_dimensionality[1:])
all_subjects_list = get_human_connectome_project_subjects(
data_dir=cfg['data-dir'], first_n_subjects=n_subjects
data_dir=cfg['data-dir'], first_n_subjects=num_subjects
)

for tvfc_summary_measure in cfg['TVFC-summary-measures']:
Expand All @@ -55,15 +55,15 @@
logging.info(f'> SUMMARY MEASURE: {tvfc_summary_measure:s}')
logging.info(f'> MODEL NAME: {model_name:s}')
logging.info(f'> SCAN ID: {scan_id:d}')
logging.info(f'> SUBJECT {i_subject+1: 3d} / {n_subjects:d}: {subject_filename:s}')
logging.info(f'> SUBJECT {i_subject+1: 3d} / {num_subjects:d}: {subject_filename:s}')

# Load TVFC estimates - some may be missing.
tvfc_estimates_filepath = os.path.join(
tvfc_estimates_savedir, f"{subject_filename.removesuffix('.txt'):s}.csv"
)
if not os.path.exists(tvfc_estimates_filepath):
logging.warning(f"Could not find TVFC estimates '{tvfc_estimates_filepath:s}'.")
tvfc_estimates_array = np.empty((int(n_time_series * (n_time_series - 1) / 2), ))
tvfc_estimates_array = np.empty((int(num_time_series * (num_time_series - 1) / 2), ))
tvfc_estimates_array[:] = np.nan
else:
tvfc_estimates_df = pd.read_csv(tvfc_estimates_filepath, index_col=0) # (D*D, N)
Expand Down Expand Up @@ -98,7 +98,7 @@
reconstruct_symmetric_summary_measure_matrix_from_tril(
mean_over_subjects_edgewise_summarized_tvfc_df.values,
tvfc_summary_measure=tvfc_summary_measure,
num_time_series=n_time_series
num_time_series=num_time_series
)
) # (D, D)

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/fmri/rs/HCP/brain_states/plot_brain_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def plot_brain_state_cluster_centroids(
),
index_col=0
) # (D, D)
n_time_series = cluster_centroid_df.shape[0] # D
num_time_series = cluster_centroid_df.shape[0] # D

brain_state += 1 # make 1-indexed
i_subplot = scan_id * n_basis_states + brain_state
Expand All @@ -69,7 +69,7 @@ def plot_brain_state_cluster_centroids(
if data_dimensionality == 'd15':
cluster_centroid_array, new_rsn_names = reorder_ica_components(
original_matrix=cluster_centroid_df.values,
n_time_series=n_time_series,
num_time_series=num_time_series,
config_dict=config_dict,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@


def _save_number_of_brain_state_switches(
config_dict: dict, n_basis_states: int, connectivity_metric: str = 'correlation'
config_dict: dict,
num_basis_states: int,
connectivity_metric: str = 'correlation',
) -> None:
for model_name in config_dict['models-brain-state-analysis']:
model_number_of_switches_df = pd.DataFrame()
for scan_id in config_dict['scan-ids']:
brain_states_savedir = os.path.join(
config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}', f'scan_{scan_id:d}'
config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}', f'scan_{scan_id:d}'
)

# Load brain state assignment file.
Expand All @@ -32,7 +34,7 @@ def _save_number_of_brain_state_switches(

# Save number of brain state switches for this model.
n_brain_state_switches_savedir = os.path.join(
config_dict['git-results-basedir'], 'brain_states', f'k{n_basis_states:02d}'
config_dict['git-results-basedir'], 'brain_states', f'k{num_basis_states:02d}'
)
n_brain_state_switches_filename = f'number_of_brain_state_switches_{model_name:s}.csv'
model_number_of_switches_df.to_csv(
Expand All @@ -54,5 +56,5 @@ def _save_number_of_brain_state_switches(
for n_brain_states in n_brain_states_list:
_save_number_of_brain_state_switches(
config_dict=cfg,
n_basis_states=n_brain_states
num_basis_states=n_brain_states
)
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@
if __name__ == "__main__":

data_split = 'LEOO' # leave-every-other-out
experiment_dimensionality = 'multivariate' # TODO: take all edges from the jointly/multivariately trained model

data_dimensionality = sys.argv[1] # 'd15', 'd50'
model_name = sys.argv[2] # 'SVWP_joint', 'DCC_joint', 'GO_joint', 'SW_cross_validated', 'SW_30', 'SW_60', 'sFC'

num_time_series = int(data_dimensionality[1:])
experiment_dimensionality = 'multivariate' # TODO: take all edges from the jointly/multivariately trained model
cfg = get_config_dict(
data_set_name='HCP_PTN1200_recon2',
subset_dimensionality=data_dimensionality,
hostname=socket.gethostname()
)
num_subjects = cfg['n-subjects']
num_time_series = int(data_dimensionality[1:])
all_subjects_list = get_human_connectome_project_subjects(
data_dir=cfg['data-dir'], first_n_subjects=num_subjects
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def plot_edgewise_imputation_benchmark_scores_joint(
edgewise_likelihoods, new_rsn_names = reorder_ica_components(
config_dict=config_dict,
original_matrix=edgewise_likelihoods.values,
n_time_series=num_time_series,
num_time_series=num_time_series,
# lower_triangular=True
)
else:
Expand Down Expand Up @@ -131,7 +131,7 @@ def plot_edgewise_imputation_benchmark_scores(
edgewise_likelihoods, new_rsn_names = reorder_ica_components(
config_dict=config_dict,
original_matrix=edgewise_likelihoods.values,
n_time_series=num_time_series,
num_time_series=num_time_series,
# lower_triangular=True
)
else:
Expand Down Expand Up @@ -184,13 +184,13 @@ def _plot_performance_difference(config_dict: dict, edgewise_likelihoods: (pd.Da
"""
edgewise_likelihoods_first_method, new_rsn_names = reorder_ica_components(
original_matrix=edgewise_likelihoods[0].values,
n_time_series=edgewise_likelihoods[0].shape[0],
num_time_series=edgewise_likelihoods[0].shape[0],
config_dict=config_dict,
# lower_triangular=True
)
edgewise_likelihoods_second_method, new_rsn_names = reorder_ica_components(
original_matrix=edgewise_likelihoods[1].values,
n_time_series=edgewise_likelihoods[1].shape[0],
num_time_series=edgewise_likelihoods[1].shape[0],
config_dict=config_dict,
# lower_triangular=True
)
Expand Down
11 changes: 5 additions & 6 deletions benchmarks/fmri/rs/HCP/plotters/plot_TVFC_estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def plot_hcp_tvfc_estimates(
figsize: tuple[float]
figures_savedir: str
"""
n_time_series = y_train_locations.shape[1]
num_time_series = y_train_locations.shape[1]

sns.set(style="whitegrid")
plt.style.use(os.path.join(config_dict['git-basedir'], 'configs', 'fig.mplstyle'))
Expand All @@ -69,9 +69,9 @@ def plot_hcp_tvfc_estimates(
)

if random_edges:
interaction_pairs_indices = np.triu_indices(n_time_series, k=1) # set k=0 to include variances
interaction_pairs_indices = np.triu_indices(num_time_series, k=1) # set k=0 to include variances
interaction_pairs_indices = np.array(interaction_pairs_indices).T
n_interactions = int(n_time_series * (n_time_series - 1) / 2)
n_interactions = int(num_time_series * (num_time_series - 1) / 2)
assert n_interactions == len(interaction_pairs_indices)
random_interactions = np.random.choice(len(interaction_pairs_indices), num_rows*num_columns)
interaction_pairs_indices = interaction_pairs_indices[random_interactions]
Expand Down Expand Up @@ -208,7 +208,6 @@ def plot_hcp_tvfc_estimates(
x, y = load_human_connectome_project_data(
data_file, scan_id=scan_id, verbose=False
) # (N, 1), (N, D)
n_time_steps = x.shape[0]

if data_split == 'LEOO':
x_train, _ = leave_every_other_out_split(x)
Expand All @@ -217,11 +216,11 @@ def plot_hcp_tvfc_estimates(
x_train = x
y_train = y

n_time_steps = x_train.shape[0]
num_time_steps = x_train.shape[0]
xx = convert_to_minutes(
x_train,
repetition_time=cfg['repetition-time'],
data_length=n_time_steps
data_length=num_time_steps
)
plot_hcp_tvfc_estimates(
config_dict=cfg,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ def plot_tvfc_summary_measures_mean_over_subjects_all_edges_joint(
) # (D, D)

# Re-order ICA components.
n_time_series = summarized_tvfc_df.shape[0]
num_time_series = summarized_tvfc_df.shape[0]
if data_dimensionality == 'd15':
summarized_tvfc_array, new_rsn_names = reorder_ica_components(
config_dict=config_dict,
original_matrix=summarized_tvfc_df.values,
n_time_series=n_time_series,
num_time_series=num_time_series,
)
else:
# TODO: add RSN names map for d50
summarized_tvfc_array = summarized_tvfc_df.values
new_rsn_names = np.arange(n_time_series)
new_rsn_names = np.arange(num_time_series)

# Define mask for diagonal and upper triangular values.
mask = np.zeros_like(summarized_tvfc_array)
Expand Down Expand Up @@ -160,17 +160,17 @@ def plot_tvfc_summary_measures_mean_over_subjects_all_edges(
vmin, vmax = _set_colorbar_min_max(summary_measure)

# Re-order ICA components.
n_time_series = summarized_tvfc_df.shape[0]
num_time_series = summarized_tvfc_df.shape[0]
if data_dimensionality == 'd15':
summarized_tvfc_array, new_rsn_names = reorder_ica_components(
config_dict=config_dict,
original_matrix=summarized_tvfc_df.values,
n_time_series=n_time_series,
num_time_series=num_time_series,
)
else:
# TODO: add RSN names map for d50
summarized_tvfc_array = summarized_tvfc_df.values
new_rsn_names = np.arange(n_time_series)
new_rsn_names = np.arange(num_time_series)

# Define mask for diagonal and upper triangular values.
mask = np.zeros_like(summarized_tvfc_array)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
subset_dimensionality=data_dimensionality,
hostname=socket.gethostname()
)
n_time_series = int(data_dimensionality[1:])
kernel_params_savedir = os.path.join(cfg['git-results-basedir'], 'kernel_analysis')
kernel_params_df = pd.read_csv(
os.path.join(kernel_params_savedir, f'{kernel_param:s}_kernel_params.csv'),
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/fmri/rs/HCP/plotters/plot_node_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ def _plot_time_series(
"""
figure_name_time_series = 'time_series.pdf'

n_time_series = y_locations.shape[1]
num_time_series = y_locations.shape[1]

plt.figure(figsize=config_dict['plot-time-series-figsize'])
for i_time_series in range(n_time_series):
plt.subplot(n_time_series, 1, i_time_series+1)
for i_time_series in range(num_time_series):
plt.subplot(num_time_series, 1, i_time_series+1)
plt.plot(
x_plot, y_locations[:, i_time_series], 'x-',
markersize=0, label=f'TS_{(i_time_series+1):d}'
Expand Down Expand Up @@ -92,11 +92,11 @@ def _plot_time_series(
x_train = x
y_train = y

n_time_steps = x_train.shape[0]
num_time_steps = x_train.shape[0]
xx = convert_to_minutes(
x_train,
repetition_time=cfg['repetition-time'],
data_length=n_time_steps
data_length=num_time_steps
)
_plot_time_series(
config_dict=cfg,
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/fmri/rs/HCP/plotters/plot_sFC_estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _plot_static_functional_connectivity_estimate(
re_ordered_correlation_matrix, new_rsn_names = reorder_ica_components(
config_dict=config_dict,
original_matrix=correlation_matrix,
n_time_series=num_time_series,
num_time_series=num_time_series,
)

plt.figure()
Expand Down Expand Up @@ -84,13 +84,13 @@ def _plot_static_functional_connectivity_estimate(
subset_dimensionality=data_dimensionality,
hostname=socket.gethostname()
)
n_time_series = int(data_dimensionality[1:])
num_time_series = int(data_dimensionality[1:])
scan_ids = cfg['scan-ids']
subjects = get_human_connectome_project_subjects(
data_dir=cfg['data-dir'], as_ints=True
)

mean_sfc_estimate = np.zeros((n_time_series, n_time_series))
mean_sfc_estimate = np.zeros((num_time_series, num_time_series))
for i_subject, subject in enumerate(subjects):

print(f"\n> SUBJECT {i_subject+1:d}: {subject:d}\n")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
subjects_subset_list=all_subjects_list,
nuisance_variables=cfg['subject-measures-nuisance-variables'].copy(), # do not edit original list
morphometricity_subject_measure=subject_measure
) # (n_subjects, n_covariates)
) # (num_subjects, num_covariates)
K = get_tvfc_estimates_similarity_matrix(
config_dict=cfg,
tvfc_summary_measure=tvfc_summary_measure,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
subjects_subset_list=all_subjects_list,
nuisance_variables=cfg['subject-measures-nuisance-variables'].copy(), # do not edit original list
morphometricity_subject_measure=subject_measure
) # (n_subjects, n_covariates)
) # (num_subjects, num_covariates)
K = get_brain_state_switch_count_similarity_matrix(
config_dict=cfg,
n_basis_states=3,
Expand Down
Loading

0 comments on commit 4d5ef5a

Please sign in to comment.