-
Notifications
You must be signed in to change notification settings - Fork 150
/
Copy pathann_tf.py
120 lines (96 loc) · 3.87 KB
/
ann_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias
from sklearn.utils import shuffle
class HiddenLayer(object):
def __init__(self, M1, M2, an_id):
self.id = an_id
self.M1 = M1
self.M2 = M2
W, b = init_weight_and_bias(M1, M2)
self.W = tf.Variable(W.astype(np.float32))
self.b = tf.Variable(b.astype(np.float32))
self.params = [self.W, self.b]
def forward(self, X):
return tf.nn.relu(tf.matmul(X, self.W) + self.b)
class ANN(object):
def __init__(self, hidden_layer_sizes):
self.hidden_layer_sizes = hidden_layer_sizes
def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):
K = len(set(Y)) # won't work later b/c we turn it into indicator
# make a validation set
X, Y = shuffle(X, Y)
X = X.astype(np.float32)
Y = y2indicator(Y).astype(np.float32)
# for calculating error rate
Yvalid_flat = Yvalid
Yvalid = y2indicator(Yvalid).astype(np.float32)
# initialize hidden layers
N, D = X.shape
self.hidden_layers = []
M1 = D
count = 0
for M2 in self.hidden_layer_sizes:
h = HiddenLayer(M1, M2, count)
self.hidden_layers.append(h)
M1 = M2
count += 1
W, b = init_weight_and_bias(M1, K)
self.W = tf.Variable(W.astype(np.float32))
self.b = tf.Variable(b.astype(np.float32))
# collect params for later use
self.params = [self.W, self.b]
for h in self.hidden_layers:
self.params += h.params
# set up theano functions and variables
tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
act = self.forward(tfX)
rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=act,
labels=tfT
)
) + rcost
prediction = self.predict(tfX)
train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
n_batches = N // batch_sz
costs = []
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
for i in range(epochs):
X, Y = shuffle(X, Y)
for j in range(n_batches):
Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]
session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
if j % 20 == 0:
c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
costs.append(c)
p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
e = error_rate(Yvalid_flat, p)
print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
if show_fig:
plt.plot(costs)
plt.show()
def forward(self, X):
Z = X
for h in self.hidden_layers:
Z = h.forward(Z)
return tf.matmul(Z, self.W) + self.b
def predict(self, X):
act = self.forward(X)
return tf.argmax(act, 1)
def main():
Xtrain, Ytrain, Xvalid, Yvalid = getData()
model = ANN([2000, 1000, 500])
model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
if __name__ == '__main__':
main()