forked from leviswind/pytorch-transformer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhyperparams.py
38 lines (31 loc) · 1.22 KB
/
hyperparams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
#/usr/bin/python2
'''
June 2017 by kyubyong park.
kbpark.linguist@gmail.com.
https://www.github.com/kyubyong/transformer
'''
class Hyperparams:
'''Hyperparameters'''
# data
source_train = 'corpora/train.tags.de-en.de'
target_train = 'corpora/train.tags.de-en.en'
source_test = 'corpora/IWSLT16.TED.tst2014.de-en.de.xml'
target_test = 'corpora/IWSLT16.TED.tst2014.de-en.en.xml'
# training
batch_size = 32 # alias = N
lr = 0.0001 # learning rate. In paper, learning rate is adjusted to the global step.
logdir = 'logdir' # log directory
model_dir = './models/' # saving directory
# model
maxlen = 10 # Maximum number of words in a sentence. alias = T.
# Feel free to increase this if you are ambitious.
min_cnt = 20 # words whose occurred less than min_cnt are encoded as <UNK>.
hidden_units = 512 # alias = C
num_blocks = 6 # number of encoder/decoder blocks
num_epochs = 20
num_heads = 8
dropout_rate = 0.1
sinusoid = False # If True, use sinusoid. If false, positional embedding.
eval_epoch = 20 # epoch of model for eval
preload = None # epcho of preloaded model for resuming training