-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
146 lines (123 loc) · 7.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import argparse
# these will be imported in MS2. uncomment then!
import torch
from torch.utils.data import DataLoader
from methods.deep_network import SimpleNetwork, Trainer
from data import H36M_Dataset, FMA_Dataset, Movie_Dataset
from methods.pca import PCA
from methods.cross_validation import cross_validation
from metrics import accuracy_fn,mse_fn, macrof1_fn
from methods.knn import KNN
from methods.dummy_methods import DummyClassifier, DummyRegressor
from methods.logistic_regression import LogisticRegression
from methods.linear_regression import LinearRegression
def main(args):
# First we create all of our dataset objects. The dataset objects store the data, labels (for classification) and the targets for regression
if args.dataset=="h36m":
train_dataset = H36M_Dataset(split="train", path_to_data=args.path_to_data)
test_dataset = H36M_Dataset(split="test", path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
#uncomment for MS2
val_dataset = H36M_Dataset(split="val",path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
elif args.dataset=="music":
train_dataset = FMA_Dataset(split="train", path_to_data=args.path_to_data)
test_dataset = FMA_Dataset(split="test", path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
#uncomment for MS2
val_dataset = FMA_Dataset(split="val",path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
elif args.dataset=="movies":
train_dataset = Movie_Dataset(split="train", path_to_data=args.path_to_data)
test_dataset = Movie_Dataset(split="test", path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
#uncomment for MS2
val_dataset = Movie_Dataset(split="val", path_to_data=args.path_to_data, means=train_dataset.means, stds=train_dataset.stds)
# Note: We only use the following methods for more old-school methods, not the nn!
train_data, train_regression_target, train_labels = train_dataset.data, train_dataset.regression_target, train_dataset.labels
test_data, test_regression_target, test_labels = test_dataset.data, test_dataset.regression_target, test_dataset.labels
print("Dataloading is complete!")
# Dimensionality reduction (MS2)
if args.use_pca:
print("Using PCA")
pca_obj = PCA(d = 200)
exvar = pca_obj.find_principal_components(train_data)
print(f'Explained variance in PCA --> {exvar}')
train_data = pca_obj.reduce_dimension(train_data)
train_regression_target = pca_obj.reduce_dimension(train_regression_target)
test_data = pca_obj.reduce_dimension(test_data)
test_regression_target = pca_obj.reduce_dimension(test_regression_target)
# Neural network. (This part is only relevant for MS2.)
if args.method_name == "nn":
# Pytorch dataloaders
print("Using deep network...")
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# create model
model = SimpleNetwork(input_size=train_dataset.feature_dim, num_classes=train_dataset.num_classes)
# training loop
trainer = Trainer(model, lr=args.lr, epochs=args.max_iters)
trainer.train_all(train_dataloader, val_dataloader)
print("Final evaluation metrics ==> ")
results_class = trainer.eval(test_dataloader)
print("\n")
np.save("results_class", results_class.numpy())
# classical ML methods (MS1 and MS2)
# we first create the classification/regression objects
# search_arg_vals and search_arg_name are defined for cross validation
# we show how to create the objects for DummyClassifier and DummyRegressor
# the rest of the methods are up to you!
else:
if args.method_name == "dummy_classifier":
method_obj = DummyClassifier()
search_arg_vals = [1,2,3]
search_arg_name = "dummy_arg"
elif args.method_name == 'dummy_regressor':
method_obj = DummyRegressor()
search_arg_vals = [1,2,3]
train_labels = train_regression_target
search_arg_name = "dummy_arg"
elif args.method_name == 'logistic_regression':
method_obj = LogisticRegression(lr= args.lr, max_iters= args.max_iters)
search_arg_vals = [1e-3, 1e-4, 5e-5, 1e-5, 1e-6]
search_arg_name = "lr"
elif args.method_name == 'ridge_regression':
method_obj = LinearRegression(lmda= args.ridge_regression_lmda)
train_labels = train_regression_target
search_arg_vals = [0, 0.1, 1, 10, 100, 150, 500, 1000]
search_arg_name = "lmda"
elif args.method_name == 'knn':
method_obj = KNN(k= args.knn_neighbours)
search_arg_name = "k"
search_arg_vals = [1, 2, 3, 4, 5, 8, 10]
# cross validation (MS1)
if args.use_cross_validation:
print("Using cross validation! \n")
best_arg, best_val_acc = cross_validation(method_obj=method_obj, search_arg_name=search_arg_name, search_arg_vals=search_arg_vals, data=train_data, labels=train_labels, k_fold=4)
# set the classifier/regression object to have the best hyperparameter found via cross validation:
method_obj.set_arguments(best_arg)
# FIT AND PREDICT:
print("FIT & PREDICT phase ...")
method_obj.fit(train_data, train_labels)
pred_labels = method_obj.predict(test_data)
# Report test results
if method_obj.task_kind == 'regression':
loss = mse_fn(pred_labels,test_regression_target)
print("Final MSE loss is", loss)
else:
acc = accuracy_fn(pred_labels,test_labels)
print("Final classification accuracy is", acc)
macrof1 = macrof1_fn(pred_labels,test_labels)
print("Final macro F1 score is", macrof1)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default="h36m", type=str, help="choose between h36m, movies, music")
parser.add_argument('--path_to_data', default="..", type=str, help="the path to wherever you put the data, if it's in the parent folder, you can use ..")
parser.add_argument('--method_name', default="knn", type=str, help="knn / logistic_regression / nn")
parser.add_argument('--knn_neighbours', default=3, type=int, help="number of knn neighbours")
parser.add_argument('--lr', type=float, default=1e-4, help="learning rate for methods with learning rate")
parser.add_argument('--ridge_regression_lmda', type=float, default=150, help="lambda for ridge regression")
parser.add_argument('--max_iters', type=int, default=500, help="max iters for methods which are iterative")
parser.add_argument('--use_cross_validation', action="store_true", help="to enable cross validation")
# Feel free to add more arguments here if you need
# MS2 arguments
parser.add_argument('--use_pca', action="store_true", help="to enable PCA")
args = parser.parse_args()
main(args)