-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_sighan.py
111 lines (105 loc) · 6.12 KB
/
train_sighan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import tensorflow as tf
import numpy as np
import Sighan_model
import os
def main(_):
print('read word embedding......')
embedding = np.load('./data/vector.npy')
print('read ner train data......')
train_word = np.load('./data/train_word.npy')
train_label = np.load('./data/train_label.npy')
train_length = np.load('./data/train_length.npy')
print('read cws train data......')
train_cws_word = np.load('./data/cws_word.npy')
train_cws_label = np.load('./data/cws_label.npy')
train_cws_length = np.load('./data/cws_length.npy')
setting = Sighan_model.Setting()
task_ner = []
task_cws = []
for i in range(setting.batch_size):
task_ner.append([1, 0])
task_cws.append([0, 1])
with tf.Graph().as_default():
# use GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
with sess.as_default():
initializer = tf.contrib.layers.xavier_initializer()
with tf.variable_scope('ner_model', reuse=None, initializer=initializer):
m = Sighan_model.TransferModel(setting, tf.cast(embedding, tf.float32), adv=True, is_train=True)
m.multi_task()
global_step = tf.Variable(0, name="global_step", trainable=False)
global_step1 = tf.Variable(0, name="global_step1", trainable=False)
optimizer = tf.train.AdamOptimizer(0.001)
if setting.clip > 0:
grads, vs = zip(*optimizer.compute_gradients(m.loss))
grads, _ = tf.clip_by_global_norm(grads, clip_norm=setting.clip)
train_op = optimizer.apply_gradients(zip(grads, vs), global_step)
train_op1 = optimizer.apply_gradients(zip(grads, vs), global_step1)
else:
train_op = optimizer.minimize(m.loss, global_step)
train_op1 = optimizer.minimize(m.loss, global_step1)
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver(max_to_keep=None)
save_path = './ckpt/lstm+crf-sighan'
for one_epoch in range(setting.num_epoches):
temp_order = list(range(len(train_word)))
temp_order_cws = list(range(len(train_cws_word)))
np.random.shuffle(temp_order)
np.random.shuffle(temp_order_cws)
for i in range(len(temp_order) // setting.batch_size):
for j in range(2):
if j == 0:
temp_word = []
temp_label = []
temp_label_ = []
temp_length = []
temp_input_index = temp_order[i * setting.batch_size:(i + 1) * setting.batch_size]
temp_input_index1 = temp_order_cws[i * setting.batch_size:(i + 1) * setting.batch_size]
for k in range(len(temp_input_index)):
temp_word.append(train_word[temp_input_index[k]])
temp_label.append(train_label[temp_input_index[k]])
temp_label_.append(train_cws_label[temp_input_index1[k]])
temp_length.append(train_length[temp_input_index[k]])
feed_dict = {}
feed_dict[m.input] = np.asarray(temp_word)
feed_dict[m.label] = np.asarray(temp_label)
feed_dict[m.label_] = np.asarray(temp_label_)
feed_dict[m.sent_len] = np.asarray(temp_length)
feed_dict[m.is_ner] = 1
feed_dict[m.task_label] = np.asarray(task_ner)
_, step, loss = sess.run([train_op, global_step, m.ner_loss], feed_dict)
if step % 100 == 0:
temp = "step {},loss {}".format(step, loss)
print(temp)
current_step = step
# if current_step % 500 == 0 and 30000 < current_step < 180000:
if current_step % 500 == 0 :
saver.save(sess, save_path=save_path, global_step=current_step)
else:
temp_cws_word = []
temp_cws_label = []
temp_cws_label_ = []
temp_cws_length = []
temp_input_index = temp_order_cws[i * setting.batch_size:(i + 1) * setting.batch_size]
temp_input_index1 = temp_order[i * setting.batch_size:(i + 1) * setting.batch_size]
for k in range(len(temp_input_index)):
temp_cws_word.append(train_cws_word[temp_input_index[k]])
temp_cws_label.append(train_label[temp_input_index1[k]])
temp_cws_label_.append(train_cws_label[temp_input_index[k]])
temp_cws_length.append(train_cws_length[temp_input_index[k]])
feed_dict = {}
feed_dict[m.input] = np.asarray(temp_cws_word)
feed_dict[m.label] = np.asarray(temp_cws_label)
feed_dict[m.label_] = np.asarray(temp_cws_label_)
feed_dict[m.sent_len] = np.asarray(temp_cws_length)
feed_dict[m.is_ner] = 0
feed_dict[m.task_label] = np.asarray(task_cws)
_, step1, cws_loss = sess.run([train_op1, global_step1, m.cws_loss], feed_dict)
if step1 % 500 == 0:
tempstr = "step2 {},cws_loss {}".format(step1, cws_loss)
print(tempstr)
if __name__ == "__main__":
tf.app.run()