-
Notifications
You must be signed in to change notification settings - Fork 3
/
BERT_testing_linux.fsx
182 lines (144 loc) · 6.53 KB
/
BERT_testing_linux.fsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//#if WIN
#I @"C:/Users/moloneymb/.nuget/packages/"
//#endif
#if LINUX
#I @"/home/moloneymb/.nuget/packages/"
#endif
#r @"system.runtime.compilerservices.unsafe/4.5.2/lib/netstandard2.0/System.Runtime.CompilerServices.Unsafe.dll"
#r @"numsharp/0.20.5/lib/netstandard2.0/NumSharp.Core.dll"
#r @"tensorflow.net/0.14.0/lib/netstandard2.0/TensorFlow.NET.dll"
#r @"system.memory/4.5.3/lib/netstandard2.0/System.Memory.dll"
#r @"google.protobuf/3.10.1/lib/netstandard2.0/Google.Protobuf.dll"
#r @"argu/6.0.0/lib/netstandard2.0/Argu.dll"
#r @"csvhelper/12.2.3/lib/net47/CsvHelper.dll"
#r @"newtonsoft.json/12.0.2/lib/net45/Newtonsoft.Json.dll"
#r @"sharpziplib/1.2.0/lib/net45/ICSharpCode.SharpZipLib.dll"
#r "System.IO.Compression"
#load @"BertInFSharp/common.fs"
#load @"BertInFSharp/utils.fs"
#load @"BertInFSharp/tokenization.fs"
#load @"BertInFSharp/run_classifier.fs"
#load @"BertInFSharp/modeling.fs"
#load @"BertInFSharp/optimization.fs"
Common.setup()
Utils.setup()
#time "on"
open Tokenization
open System
open System.IO
open Newtonsoft.Json.Linq
open Modeling
open Tensorflow.Operations.Activation
open Modeling.Activation
open NumSharp
open Tensorflow
open System.Collections.Generic
open RunClassifier
// Fixes bug on GPU with linux
//let config = ConfigProto()
//let gpuOptions = GPUOptions()
//gpuOptions.AllowGrowth <- true
//config.GpuOptions <- gpuOptions
let tf = Tensorflow.Binding.tf
let do_lower_case = true
let tokenizer = Tokenization.FullTokenizer(vocab_file=Common.vocab_file, do_lower_case=do_lower_case)
let bert_config = BertConfig.from_json_string(File.ReadAllText(Common.bert_config_file))
// Compute train and warmup steps from batch size
// These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
let BATCH_SIZE = 2
let NUM_LABELS = 2
let LEARNING_RATE = 2e-5f
let MAX_SEQ_LENGTH = 128
let NUM_TRAIN_EPOCHS = 3.0f
// Warmup is a period of time where hte learning rate
// is small and gradually increases--usually helps training.
let WARMUP_PROPORTION = 3.0f
// Model configs
let SAVE_CHECKPOINTS_STEPS = 500
let SAVE_SUMMARY_STEPS = 100
let vocab = File.ReadAllLines(Common.vocab_file)
let getTrainTest limit =
let vocab_map = vocab |> Array.mapi (fun i x -> (x,i)) |> Map.ofArray
let f x y v =
Directory.GetFiles(Path.Combine(Common.data, "aclImdb",x,y))
|> Array.truncate limit
|> Async.mapiChunkBySize 200 (fun _ x -> InputExample(text_a = File.ReadAllText(x), label = string v) :> IExample)
let g x =
let mm = [| yield! f x "pos" 1; yield! f x "neg" 0|] |> Array.shuffle
convert_examples_to_features(mm,vocab_map,MAX_SEQ_LENGTH, tokenizer :> Tokenization.ITokenizer)
(g "train", g "test")
let train,test = getTrainTest 2500
let input_ids = tf.placeholder(tf.int32,TensorShape([|BATCH_SIZE; MAX_SEQ_LENGTH|]))
let input_mask = tf.placeholder(tf.int32,TensorShape([|BATCH_SIZE; MAX_SEQ_LENGTH|]))
let labels = tf.placeholder(tf.int32,TensorShape([|BATCH_SIZE|]))
let bertModel = BertModel(bert_config, false, input_ids = input_ids, input_mask = input_mask)
let ops = tf.get_default_graph().get_operations()
// create the restore op before the other ops
let restore = tf.restore(Common.bert_chkpt)
// Use "pooled_output" for classification tasks on an entire sentence.
// Use "sequence_outputs" for token-level output.
let output_layer = bertModel.PooledOutput
let hidden_size = output_layer.shape |> Seq.last
let output_weights = tf.get_variable("output_weights",
TensorShape([|hidden_size; NUM_LABELS|]),
initializer=tf.truncated_normal_initializer(stddev=0.02f))
let output_bias = tf.get_variable("output_bias",
TensorShape(NUM_LABELS),
initializer=tf.zeros_initializer)
let (loss, predicted_labels, log_probs) =
use _loss = vs.variable_scope("loss")
// Dropout helps prevent overfitting
let output_layer = tf.nn.dropout(output_layer, keep_prob=tf.constant(0.9f))
let logits = tf.matmul(output_layer, output_weights._AsTensor()) // trained in transpose
let logits = tf.nn.bias_add(logits, output_bias)
let log_probs = tf.log(tf.nn.softmax(logits, axis = -1))
// Convert Labels into one-hot encoding
let one_hot_labels = tf.one_hot(labels, depth=NUM_LABELS, dtype=tf.float32)
let predicted_labels = tf.squeeze(tf.argmax(log_probs, axis = -1, output_type = tf.int32))
/// If we're predicting, we want predicted labels and the probabiltiies.
//if is_predicting:
// return (predicted_labels, log_probs)
// If we're train/eval, compute loss between predicted and actual label
let per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis= Nullable(-1))
let loss = tf.reduce_mean(per_example_loss)
(loss, predicted_labels, log_probs)
let num_train_steps = int(float32 train.Length / float32 BATCH_SIZE * NUM_TRAIN_EPOCHS)
let num_warmup_steps = int(float32 num_train_steps * WARMUP_PROPORTION)
// Train steps should be 400
// This should be ~43
//num_train_steps
//1404
//let train_op = Optimization.create_optimizer(loss, LEARNING_RATE, num_train_steps, Some(num_warmup_steps))
let train_op = Optimization.create_optimizer(loss, LEARNING_RATE, num_train_steps, None)
let sess = tf.Session()
let init = tf.global_variables_initializer()
sess.run(init)
sess.run(restore) // load weights
System.Diagnostics.Debug.WriteLine(sprintf "Training with batch size %i" BATCH_SIZE)
let fetchOps = [|
train_op :> ITensorOrOperation
loss :> ITensorOrOperation
|]
//let res = sess.run(fetchOps, [|FeedItem(input_ids,t1); FeedItem(input_mask,t2); FeedItem(labels,t3)|])
let xs =
[|
for i in 0..400 do
let subsample = train |> Array.subSample BATCH_SIZE
let t1 = NDArray(subsample |> Array.map (fun x -> x.input_ids))
let t2 = NDArray(subsample |> Array.map (fun x -> x.input_mask))
let t3 = NDArray(subsample |> Array.map (fun x -> match x.label_id with | 1014 -> 0 | 1015 -> 1 | _ -> failwith "err"))
let res = sess.run(fetchOps, [|FeedItem(input_ids,t1); FeedItem(input_mask,t2); FeedItem(labels,t3)|])
let acc = (res.[1].Data<float32>().[0])
printfn "%i %f" i acc
yield acc
|]
//let ops = tf.get_default_graph().get_operations()
//open System.IO
//File.WriteAllLines(@"C:\EE\ops.txt",ops |> Array.map (fun x -> x.name))
//let ndres = NDArray(res)
//np.save("train.npy", ndres)
// Execution runtime is 156
//156.0/40.0
//res.[2].Data<int32>()
//t3.Data<int32>()