-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
183 lines (164 loc) · 7.63 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from cProfile import label
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
from numpy import random
import seaborn as sn
import pandas as pd
from sklearn import manifold
from configuration import config
from pathlib import Path
def check_existing_paths(dir_paths=[], file_paths=[]):
project_path = os.getcwd() + "/"
for dir_path in dir_paths:
if not os.path.isdir(dir_path):
os.mkdir(project_path + dir_path)
for file_path in file_paths:
if not os.path.isfile(file_path):
print("Configuration error: file '" + file_path + "' not exist in project")
exit(1)
def get_subjects_seq_idx(seq_df_path):
subjects_idxs = {}
subject_count = 0
sequences_count = 0
if config.type_of_database == 'BioVid':
path = 'BioVid_dataset/' # Path the landmark folder containig the dataset
subjects = [name for name in os.listdir(path)] # Retrieve all the subjects in the dataset
for i, sub in enumerate(subjects):
if sub == '102309_m_61': # Subject to ignore
continue
path_sequences = path + sub + '/'
sequences = [name for name in os.listdir(path_sequences)] # Retrieve all the sequences of the current subject
for seq in sequences:
if subject_count not in subjects_idxs:
subjects_idxs[subject_count] = []
subjects_idxs[subject_count].append(sequences_count)
sequences_count += 1
subject_count +=1
else:
seq_df = pd.read_csv(seq_df_path)
subjects_idxs = {}
subject_count = 0
seq_name = ""
for seq_num in np.arange(seq_df.shape[0]):
if seq_num != 0 and seq_name[0:6] != seq_df.iloc[seq_num][0][0:6]:
subject_count += 1
if subject_count not in subjects_idxs:
subjects_idxs[subject_count] = []
subjects_idxs[subject_count].append(seq_num)
seq_name = seq_df.iloc[seq_num][0]
#print("subject_inx", subjects_idxs)
return subjects_idxs
def get_training_and_test_idx(num_videos, cross_val_protocol, seq_df_path):
subject_idxs = get_subjects_seq_idx(seq_df_path)
num_subject = 25
all_training_idx = []
all_test_idx = []
if cross_val_protocol == "Leave-One-Subject-Out":
for subject_test in np.arange(num_subject):
idxs_test = subject_idxs[subject_test]
get_test_idx = np.array(idxs_test)
random.shuffle(get_test_idx)
all_test_idx.append(get_test_idx)
get_training_idx = np.delete(np.arange(0, num_videos), idxs_test)
random.shuffle(get_training_idx)
all_training_idx.append(get_training_idx)
elif cross_val_protocol == "5-fold-cross-validation":
subject_iterator = num_subject/5
for subjects_test_offset in np.arange(0, num_subject, subject_iterator):
idxs_test = []
subjects_offset = subjects_test_offset + subject_iterator
if subjects_offset >= len(subject_idxs):
subjects_offset = len(subject_idxs)
for subject_test in np.arange(subjects_test_offset, subjects_offset):
idxs_test.append(subject_idxs[subject_test])
idxs_test = sum(idxs_test, [])
get_test_idx = np.array(idxs_test)
random.shuffle(get_test_idx)
all_test_idx.append(get_test_idx)
get_training_idx = np.delete(np.arange(0, num_videos), idxs_test)
random.shuffle(get_training_idx)
all_training_idx.append(get_training_idx)
elif cross_val_protocol == "Leave-One-Sequence-Out":
for video_idx in np.arange(0, num_videos):
all_test_idx.append(np.asarray([video_idx]))
all_training_idx.append(np.delete(np.arange(0, num_videos), video_idx))
return all_training_idx, all_test_idx
def plot_matrix(cm, labels, fname, normalize=True):
# Normalize confusion matrix
if normalize:
for row_idx in np.arange(cm.shape[0]):
sum_row = sum(cm[row_idx])
if sum_row > 0:
cm[row_idx] = cm[row_idx] / sum_row
df_cm = pd.DataFrame(cm, index=[str(i) for i in labels],
columns=[str(i) for i in labels])
plt.figure(figsize=(10, 7))
sn.heatmap(df_cm, annot=True, cmap="Blues")
plt.savefig(fname, dpi=240)
plt.close()
def save_data_on_csv(data, out_df, file_path):
data_df_scores = np.hstack((np.array(data).reshape(1, -1)))
out_df = out_df.append(pd.Series(data_df_scores.reshape(-1), index=out_df.columns),
ignore_index=True)
out_df.to_csv(file_path, index=False, header=True)
return out_df
def save_GMM_mean_info(gmm_means, selected_lndks_idx, csv_path, png_path):
model = manifold.MDS(n_components=2, metric=True, n_init=4, random_state=1, max_iter=200, dissimilarity='euclidean')
columns = ["#kernel"] + ["ldk #" + str(ldks_idx) for ldks_idx in selected_lndks_idx]
out_gmm_means = pd.DataFrame(columns=[columns])
for kernel_idx in np.arange(len(gmm_means)):
data_gmm_means = np.hstack(
(np.array([kernel_idx] + [center for center in gmm_means[kernel_idx]]).reshape(1, -1)))
out_gmm_means = out_gmm_means.append(pd.Series(data_gmm_means.reshape(-1), index=out_gmm_means.columns),
ignore_index=True)
out_gmm_means.to_csv(csv_path, index=False, header=True)
data_transformed = model.fit_transform(gmm_means)
plt.plot(data_transformed[:, 0], data_transformed[:, 1], '.b')
for k in np.arange(data_transformed.shape[0]):
plt.annotate(str(k), (data_transformed[k, 0], data_transformed[k, 1]))
plt.title('Position of %d clusters remapped in 2D with MSD' % (data_transformed.shape[0]))
plt.savefig(png_path)
plt.close()
def read_dict_from_csv(file_path, out_df, dict_labels):
dict = {}
if os.path.isfile(file_path):
with open(file_path, 'r') as thresholds_rslt_file:
reader = csv.reader(thresholds_rslt_file)
for idx, row in enumerate(reader):
if idx > 0:
dict[float(row[0])] = {}
for idx, label in enumerate(dict_labels):
dict[float(row[0])][label] = float(row[idx+1])
data = np.hstack((np.array([row[0], row[1], row[2]]).reshape(1, -1)))
out_df = out_df.append(pd.Series(data.reshape(-1), index=out_df.columns), ignore_index=True)
return dict
def plot_graph(x, y, x_label, y_label, title, file_path, color = 'blue'):
plt.plot(x, y, color=color)
plt.ylabel(y_label)
plt.xlabel(x_label)
plt.title(title)
plt.savefig(file_path)
plt.close()
def plot_all_graphs(x, y, x_label, y_label, name_labels, title, file_path):
plt.close()
for i,label in zip(y, name_labels):
plt.plot(x, i, label = label)
plt.ylabel(y_label)
plt.xlabel(x_label)
plt.title(title)
plt.legend()
plt.savefig(file_path)
plt.close()
def plot_error_graph(mean_error, errors, n_test, threshold_idx, path_errors):
plt.close()
plt.bar(np.arange(1, n_test+1), errors, color="blue")
plt.axhline(y=mean_error, xmin=0, xmax=n_test+1, color="red", label='Mean Absolute Error: '+str(mean_error))
plt.ylabel("Average of the Mean Absolute Error")
plt.xlabel("Num round")
plt.title("Mean Absolute Errors")
plt.legend()
plt.savefig(path_errors + str(threshold_idx) + " mean_errors_graph.png")
plt.close()
print("Histogram of the mean absolute error detected saved in a png file on path '" + path_errors +"'")