-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathexample7.py
168 lines (131 loc) · 4.9 KB
/
example7.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
##############################################################
### BASE LINE + Cross Entropy Loss + Momentum based SGD ###
##############################################################
import numpy as np
import mnist_loader
### Data Loading
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
### Parameters
n_epoch = 30
learning_rate = 0.1
batch_size = 10
# for momentum-based SGD
mu = 0.9 # momentum
v_b2 = 0
v_b3 = 0
v_W2 = 0
v_W3 = 0
### Network Architecture
n_node_input = 784
n_node_hidden = 100
n_node_output = 10
### Weight & Bias
W2=np.random.randn(n_node_hidden, n_node_input)
b2=np.random.randn(n_node_hidden, 1)
W3=np.random.randn(n_node_output, n_node_hidden)
b3=np.random.randn(n_node_output, 1)
### Activation Functions
def sigmoid(z):
"""The sigmoid function."""
return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
"""Derivative of the sigmoid function."""
return sigmoid(z)*(1-sigmoid(z))
### Training
test_errors = []
training_errors = []
n = len(training_data)
file_name_common = 'msgd_ce'+'_nHidden'+str(n_node_hidden)+'.txt'
try:
training_errors = np.loadtxt(fname='tr_'+file_name_common)
test_errors = np.loadtxt(fname='test_'+file_name_common)
except:
for j in range(n_epoch):
## Stochastic Gradient Descent
np.random.shuffle(training_data)
# for each batch
sum_of_training_error = 0
for k in range(0, n, batch_size):
batch = training_data[k:k+batch_size]
# average gradient for samples in a batch
sum_gradient_b3 = 0
sum_gradient_b2 = 0
sum_gradient_W3 = 0
sum_gradient_W2 = 0
# for each sample
for x, y in batch:
## Feed forward
a1 = x
z2 = np.dot(W2, a1) + b2
a2 = sigmoid(z2)
z3 = np.dot(W3, a2) + b3
a3 = sigmoid(z3)
## Backpropagation
# Step 1: Error at the output layer [Cross-Entropy Cost]
delta_3 = (a3-y)
# Step 2: Error relationship between two adjacent layers
delta_2 = sigmoid_prime(z2)*np.dot(W3.transpose(), delta_3)
# Step 3: Gradient of C in terms of bias
gradient_b3 = delta_3
gradient_b2 = delta_2
# Step 4: Gradient of C in terms of weight
gradient_W3 = np.dot(delta_3, a2.transpose())
gradient_W2 = np.dot(delta_2, a1.transpose())
# update gradients
sum_gradient_b3 += gradient_b3
sum_gradient_b2 += gradient_b2
sum_gradient_W3 += gradient_W3
sum_gradient_W2 += gradient_W2
## Training Error
sum_of_training_error += int(np.argmax(a3) != np.argmax(y))
# update weights & biases via momentum-based SGD
v_b3 = mu * v_b3 - learning_rate * sum_gradient_b3 / batch_size
b3 += v_b3
v_b2 = mu*v_b2 - learning_rate * sum_gradient_b2 / batch_size
b2 += v_b2
v_W3 = mu * v_W3 - learning_rate * sum_gradient_W3 / batch_size
W3 += v_W3
v_W2 = mu * v_W2 - learning_rate * sum_gradient_W2 / batch_size
W2 += v_W2
# Report Training Error
print("[TRAIN_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_training_error, n))
training_errors.append(np.float(sum_of_training_error) / n)
### Test
n_test = len(test_data)
sum_of_test_error = 0
for x, y in test_data:
## Feed forward
a1 = x
z2 = np.dot(W2, a1) + b2
a2 = sigmoid(z2)
z3 = np.dot(W3, a2) + b3
a3 = sigmoid(z3)
## Test Error
# in test data, label info is a number not one-hot vector as in training data
sum_of_test_error += int(np.argmax(a3) != y)
# Report Test Error
print("[ TEST_ERROR] Epoch %02d: %5d / %05d" % (j, sum_of_test_error, n_test))
test_errors.append(np.float(sum_of_test_error)/n_test)
## Save Results
np.savetxt('tr_'+file_name_common, np.array(training_errors), fmt='%.5f')
np.savetxt('test_'+file_name_common, np.array(test_errors), fmt='%.5f')
### Plot results
import matplotlib.pyplot as plt
idx = np.arange(1,n_epoch+1)
plt.plot(idx, np.array(test_errors)*100,'ro-', label='Momentum SGD')
try:
# Load baseline
file_name_common = 'ce'+'_nHidden'+str(n_node_hidden)+'.txt'
mse = np.loadtxt(fname='test_'+file_name_common)
plt.plot(idx,np.array(mse)*100,'bo-', label='SGD')
except:
print ('There is no result of baseline')
legend = plt.legend(loc='upper center', shadow=True)
font = {'family' : 'normal',
'weight' : 'bold',
'size' : 15}
plt.rc('font', **font)
plt.xlabel('Epoch', fontsize=22)
plt.ylabel('Test error rate [%]', fontsize=22)
plt.grid(True)
plt.show()