-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmodel_defs.py
243 lines (212 loc) · 9.79 KB
/
model_defs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from bi_rnn import bi_rnn
from utils import *
###############################################
# NN creation functions #
###############################################
class Parameters:
def __init__(self, init={}, emb={}, w_c=False, b_c=False, w_p=False,
b_p=False, w_po=False, b_po=False):
self.init_dic = init
self.embeddings = emb
self.W_conv = w_c
self.b_conv = b_c
self.W_pred = w_p
self.b_pred = b_p
self.W_pot = w_po
self.b_pot = b_po
def device_for_node(n):
if n.type == "MatMul":
return "/gpu:0"
else:
return "/cpu:0"
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def weight_variable(shape, name='weight'):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name=name+'_W')
def bias_variable(shape, name='weight'):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial, name=name+'_b')
def feature_layer(in_layer, config, params, reuse=False):
in_features = config.input_features
features_dim = config.features_dim
batch_size = config.batch_size
num_steps = config.num_steps
feature_mappings = config.feature_maps
# inputs
num_features = len(in_features)
input_ids = in_layer
if reuse:
tf.get_variable_scope().reuse_variables()
param_vars = params.embeddings
# lookup layer
else:
param_dic = params.init_dic
param_vars = {}
for feat in in_features:
if feat in param_dic:
param_vars[feat] = \
tf.Variable(tf.convert_to_tensor(param_dic[feat],
dtype=tf.float32),
name=feat + '_embedding',
trainable=False)
else:
shape = [len(feature_mappings[feat]['reverse']), features_dim]
initial = tf.truncated_normal(shape, stddev=0.1)
param_vars[feat] = tf.Variable(initial,
name=feat + '_embedding')
params = [param_vars[feat] for feat in in_features]
input_embeddings = tf.nn.embedding_lookup(params, input_ids, name='lookup')
# add and return
embedding_layer = tf.reduce_sum(input_embeddings, 2)
return (embedding_layer, param_vars)
def bi_lstm_layer(in_layer, config, reuse=False, name='Bi_LSTM'):
num_units = config.rnn_hidden_units
output_size = config.rnn_output_size
batch_size = int(in_layer.get_shape()[0])
num_steps = int(in_layer.get_shape()[1])
input_size = int(in_layer.get_shape()[2])
initializer = tf.random_uniform_initializer(-0.1, 0.1)
lstm_cell_f = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True,
num_proj=output_size, cell_clip=1.0,
initializer=initializer)
lstm_cell_b = rnn_cell.LSTMCell(num_units, input_size, use_peepholes=True,
num_proj=output_size, cell_clip=1.0,
initializer=initializer)
initial_state_f = lstm_cell_f.zero_state(batch_size, tf.float32)
inputs_list = [tf.reshape(x, [batch_size, input_size])
for x in tf.split(1, num_steps, in_layer)]
rnn_out, rnn_states = bi_rnn(lstm_cell_f, lstm_cell_b, inputs_list,
initial_state=initial_state_f, scope=name,
reuse=reuse)
out_layer = tf.transpose(tf.pack(rnn_out), perm=[1, 0, 2])
return out_layer
def convo_layer(in_layer, config, params, reuse=False, name='Convo'):
conv_window = config.conv_window
output_size = config.conv_dim
batch_size = int(in_layer.get_shape()[0])
num_steps = int(in_layer.get_shape()[1])
input_size = int(in_layer.get_shape()[2])
if reuse:
tf.get_variable_scope().reuse_variables()
W_conv = params.W_conv
b_conv = params.b_conv
else:
W_conv = weight_variable([conv_window, 1, input_size, output_size],
name=name)
b_conv = bias_variable([output_size], name=name)
reshaped = tf.reshape(in_layer, [batch_size, num_steps, 1, input_size])
conv_layer = tf.nn.relu(tf.reshape(conv2d(reshaped, W_conv),
[batch_size, num_steps, output_size],
name=name) + b_conv)
return (conv_layer, W_conv, b_conv)
def predict_layer(in_layer, config, params, reuse=False, name='Predict'):
n_outcomes = config.n_outcomes
batch_size = int(in_layer.get_shape()[0])
num_steps = int(in_layer.get_shape()[1])
input_size = int(in_layer.get_shape()[2])
if reuse:
tf.get_variable_scope().reuse_variables()
W_pred = params.W_pred
b_pred = params.b_pred
else:
W_pred = weight_variable([input_size, n_outcomes], name=name)
b_pred = bias_variable([n_outcomes], name=name)
flat_input = tf.reshape(in_layer, [-1, input_size])
pre_scores = tf.nn.softmax(tf.matmul(flat_input, W_pred) + b_pred)
preds_layer = tf.reshape(pre_scores, [batch_size, num_steps, -1])
return (preds_layer, W_pred, b_pred)
def optim_outputs(outcome, targets, config, params):
batch_size = int(outcome.get_shape()[0])
num_steps = int(outcome.get_shape()[1])
n_outputs = int(outcome.get_shape()[2])
# We are currently using cross entropy as criterion
criterion = -tf.reduce_sum(targets * tf.log(outcome))
for feat in config.l1_list:
criterion += config.l1_reg * \
tf.reduce_sum(tf.abs(params.embeddings[feat]))
# We also compute the per-tag accuracy
correct_prediction = tf.equal(tf.argmax(outcome, 2), tf.argmax(targets, 2))
accuracy = tf.reduce_sum(tf.cast(correct_prediction,
"float") * tf.reduce_sum(targets, 2)) /\
tf.reduce_sum(targets)
return (criterion, accuracy)
class SequNN:
def __init__(self, config):
self.batch_size = config.batch_size
self.num_steps = config.num_steps
num_features = len(config.input_features)
# input_ids <- batch.features
self.input_ids = tf.placeholder(tf.int32, shape=[self.batch_size,
self.num_steps,
num_features])
# targets <- batch.tag_windows_one_hot
self.targets = tf.placeholder(tf.float32, shape=[self.batch_size,
self.num_steps,
config.n_outcomes])
def make(self, config, params, reuse=False, name='SequNN'):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
(out_layer, embeddings) = feature_layer(self.input_ids, config,
params, reuse=reuse)
params.embeddings = embeddings
if config.verbose:
print('features layer done')
if config.use_rnn:
out_layer = bi_lstm_layer(embedding_layer, config, reuse=reuse)
if config.verbose:
print('rnn layer done')
if config.use_convo:
(out_layer, W_conv, b_conv) = convo_layer(out_layer, config,
params, reuse=reuse)
params.W_conv = W_conv
params.b_conv = b_conv
if config.verbose:
print('convolution layer done')
self.out_layer = out_layer
(preds_layer, W_pred, b_pred) = predict_layer(out_layer, config,
params, reuse=reuse)
params.W_pred = W_pred
params.b_pred = b_pred
self.preds_layer = preds_layer
(criterion, accuracy) = optim_outputs(preds_layer, config, params)
if config.verbose:
print('output layer done')
self.criterion = criterion
self.accuracy = accuracy
def train_epoch(self, data, train_step, config, params):
batch_size = config.batch_size
train_step = tf.train.AdagradOptimizer(config.learning_rate).minimize(criterion)
batch = Batch()
for i in range(len(data) / batch_size):
batch.read(data, i * batch_size, config)
f_dict = {self.input_ids: batch.features,
self.targets: batch.tag_windows_one_hot}
if i % 100 == 0:
train_accuracy = self.accuracy.eval(feed_dict=f_dict)
print("step %d of %d, training accuracy %f, Lemma_l1 %f" %
(i, len(data) / batch_size, train_accuracy,
tf.reduce_sum(tf.abs(params.embeddings['lemma'])).eval()))
train_step.run(feed_dict=f_dict)
def validate_accuracy(self, data, config):
batch_size = config.batch_size
batch = Batch()
total_accuracy = 0.
total = 0.
for i in range(len(data) / batch_size):
batch.read(data, i * batch_size, config)
f_dict = {self.input_ids: batch.features,
self.targets: batch.tag_windows_one_hot}
dev_accuracy = self.accuracy.eval(feed_dict=f_dict)
total_accuracy += dev_accuracy
total += 1
if i % 100 == 0:
print("%d of %d: \t:%f" % (i, len(data) / batch_size,
total_accuracy / total))
return total_accuracy / total