-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from h4iku/update-opennmt
Update OpenNMT-py scripts to work with v2
- Loading branch information
Showing
14 changed files
with
123 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
data_config=./scripts/simple-lstm/${type}_${size}_data.yaml | ||
output_dir=./saved_models/simple-lstm/$type/$size | ||
|
||
if [ ! -d $output_dir ] | ||
then | ||
mkdir -p $output_dir | ||
fi | ||
|
||
onmt_build_vocab -config $data_config -src_seq_length 510 -tgt_seq_length 510 -src_vocab_size 64000 -tgt_vocab_size 64000 -share_vocab -n_sample -1 -save_data $output_dir/final -src_vocab $output_dir/final.vocab |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
data_path=./data/$type/split/$size | ||
output_dir=./saved_models/simple-lstm-legacy/$type/$size | ||
|
||
if [ ! -d $output_dir ] | ||
then | ||
mkdir -p $output_dir | ||
fi | ||
|
||
onmt_preprocess -train_src $data_path/src-train.txt -train_tgt $data_path/tgt-train.txt -valid_src $data_path/src-val.txt -valid_tgt $data_path/tgt-val.txt --src_seq_length 510 --tgt_seq_length 510 --src_vocab_size 64000 --tgt_vocab_size 64000 -dynamic_dict -share_vocab --save_data $output_dir/final |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
data_path=./data/$type/split/$size | ||
output_dir=./saved_models/simple-lstm-legacy/$type/$size | ||
|
||
onmt_translate -model $output_dir/final-model_step_20000.pt -src $data_path/src-test.txt -beam_size 5 -n_best 1 -output $output_dir/pred-test_beam5.txt -dynamic_dict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
output_dir=./saved_models/simple-lstm-legacy/$type/$size | ||
|
||
export CUDA_VISIBLE_DEVICES=0,1,2,3 | ||
onmt_train -data $output_dir/final -world_size 4 -gpu_ranks 0 1 2 3 -encoder_type brnn -enc_layers 2 -decoder_type rnn -dec_layers 2 -rnn_size 256 -global_attention general -batch_size 32 -word_vec_size 256 -bridge -copy_attn -reuse_copy_attn -train_steps 20000 -save_checkpoint_steps 5000 -valid_steps 1000 -save_model $output_dir/final-model |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
data: | ||
corpus: | ||
path_src: data/repetition/split/large/src-train.txt | ||
path_tgt: data/repetition/split/large/tgt-train.txt | ||
valid: | ||
path_src: data/repetition/split/large/src-val.txt | ||
path_tgt: data/repetition/split/large/tgt-val.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
data: | ||
corpus: | ||
path_src: data/repetition/split/small/src-train.txt | ||
path_tgt: data/repetition/split/small/tgt-train.txt | ||
valid: | ||
path_src: data/repetition/split/small/src-val.txt | ||
path_tgt: data/repetition/split/small/tgt-val.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
#!/bin/bash | ||
|
||
OpenNMT_py=../../OpenNMT-py | ||
size=large #Can be: small OR large | ||
type=unique #Can be: repetition OR unique | ||
data_path=../../data/$type/split/$size | ||
output_dir=../../saved_models/simple-lstm/$type/$size | ||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
data_path=./data/$type/split/$size | ||
output_dir=./saved_models/simple-lstm/$type/$size | ||
|
||
python $OpenNMT_py/translate.py -model $output_dir/final-model_step_20000.pt -src $data_path/src-test.txt -beam_size 5 -n_best 1 -output $output_dir/pred-test_beam5.txt -dynamic_dict | ||
onmt_translate -model $output_dir/final-model_step_20000.pt -src $data_path/src-test.txt -beam_size 5 -n_best 1 -output $output_dir/pred-test_beam5.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
#!/bin/bash | ||
|
||
OpenNMT_py=../../OpenNMT-py | ||
size=large #Can be: small OR large | ||
type=unique #Can be: repetition OR unique | ||
output_dir=../../saved_models/simple-lstm/$type/$size | ||
size=small # Can be: small OR large | ||
type=unique # Can be: repetition OR unique | ||
data_config=./scripts/simple-lstm/${type}_${size}_data.yaml | ||
output_dir=./saved_models/simple-lstm/$type/$size | ||
|
||
export CUDA_VISIBLE_DEVICES=0,1,2,3 | ||
python $OpenNMT_py/train.py -data $output_dir/final -world_size 4 -gpu_ranks 0 1 2 3 -encoder_type brnn -enc_layers 2 -decoder_type rnn -dec_layers 2 -rnn_size 256 -global_attention general -batch_size 32 -word_vec_size 256 -bridge -copy_attn -reuse_copy_attn -train_steps 20000 -save_checkpoint_steps 5000 -valid_steps 1000 -save_model $output_dir/final-model | ||
onmt_train -config $data_config -share_vocab -src_vocab $output_dir/final.vocab -world_size 4 -gpu_ranks 0 1 2 3 -encoder_type brnn -enc_layers 2 -decoder_type rnn -dec_layers 2 -rnn_size 256 -global_attention general -batch_size 32 -word_vec_size 256 -bridge -copy_attn -reuse_copy_attn -train_steps 20000 -save_checkpoint_steps 5000 -valid_steps 1000 -save_model $output_dir/final-model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
data: | ||
corpus: | ||
path_src: data/unique/split/large/src-train.txt | ||
path_tgt: data/unique/split/large/tgt-train.txt | ||
valid: | ||
path_src: data/unique/split/large/src-val.txt | ||
path_tgt: data/unique/split/large/tgt-val.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
data: | ||
corpus: | ||
path_src: data/unique/split/small/src-train.txt | ||
path_tgt: data/unique/split/small/tgt-train.txt | ||
valid: | ||
path_src: data/unique/split/small/src-val.txt | ||
path_tgt: data/unique/split/small/tgt-val.txt |