-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhomework2_ymtaye_yyadati_lgangaramaney.py
108 lines (85 loc) · 3.2 KB
/
homework2_ymtaye_yyadati_lgangaramaney.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
## Yared Taye
## Lokesh Gangaramaney
## Yash Yadati
import numpy as np
X_tr = np.reshape(np.load("../age_regression_Xtr.npy"), (-1, 48 * 48))
ytr = np.load("../age_regression_ytr.npy")
X_te = np.reshape(np.load("../age_regression_Xte.npy"), (-1, 48 * 48))
yte = np.load("../age_regression_yte.npy")
## Split Dataset
X_tr, X_val = X_tr[:4000, :], X_tr[4000:, :]
ytr, yval = ytr[:4000], ytr[4000:]
## WEIGHT Generate
def generate_weights():
sigma = 0.01 ** 0.1
return sigma * np.random.randn(48*48+1) + 0.5
## Prediction on test set based on weight/bias
def predict(X, W_tilda):
w = W_tilda[:-1]
b = W_tilda[-1]
return X.dot(w) + b
def fmse(y_predit, y_actual):
err = np.square(y_predit-y_actual)
err = np.mean(err) * 1/2
return err
def gradient(X, y, weight, bias, alpha):
diff = X.dot(weight) + bias - y
unregularized = X.T.dot(diff.T) / X.shape[0]
return unregularized + (alpha / X.shape[0])*weight, np.average(diff)
def minibatches(X, y, batchsize):
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, X.shape[0] - batchsize + 1, batchsize):
ids = indices[start_idx:start_idx + batchsize]
yield X[ids], y[ids]
def SGD( X_tr, ytr, StepSize=0.001, alpha=0.01, batchsize=50, epochs=20):
W_tilda = generate_weights()
weight = W_tilda[:-1]
bias = W_tilda[-1]
for e in range(0, epochs):
for batch in minibatches(X_tr, ytr, batchsize):
X_tr_batch, ytr_batch = batch
g, delta = gradient(X_tr_batch, ytr_batch, weight, bias, alpha)
weight = weight - (StepSize * g)
bias = bias - (StepSize * delta)
W_tilda = np.hstack((weight, bias))
return W_tilda
def predictions(Train_X, Test_X, Train_y, Test_y, aug_w, StepSize, batches, ep, Wd):
train_pred = predict(Train_X, aug_w)
test_pred = predict(Test_X, aug_w)
train_cost = fmse(train_pred, Train_y)
test_cost = fmse(test_pred, Test_y)
print("*"*50)
print('Current Learning Rate: ', StepSize)
print('Batches: ', batches)
print('Epochs', ep)
print('L2-Reg', Wd)
print('fMSE for Training Set: ', train_cost)
print("*"*50)
print('Current Learning Rate: ', StepSize)
print('Batches: ', batches)
print('Epochs', ep)
print('L2-Reg', Wd)
print('fMSE for Testing Set: ', test_cost)
print("*" * 50)
def grid_search():
### Grid Search
LearningRates = [0.0001, 0.001, 0.005, 0.01]
BatchSizes = [10, 50, 100, 200]
WeightDecays = [0.02, 0.05, 0.07, 0.1]
num_epochs = [5, 10, 50, 100]
for ep in num_epochs:
for Wd in WeightDecays:
for batches in BatchSizes:
for lr in LearningRates:
weight = (SGD(X_tr, ytr, alpha=Wd, StepSize=lr, batchsize=batches, epochs=ep))
predictions(X_val, X_te, yval, yte, weight, lr, batches, ep, Wd)
def summary():
## USED HYPERPARAMETERS
alpha = 0.02
epislon = 0.001
num_batches = 10
num_epochs = 100
weight = (SGD(X_tr, ytr, alpha=alpha, StepSize=epislon, batchsize=num_batches, epochs=num_epochs))
predictions(X_tr, X_te, ytr, yte, weight, ep=num_epochs, batches=num_batches, StepSize=epislon, Wd=alpha)
summary()