Skip to content

Commit

Permalink
reverse learning
Browse files Browse the repository at this point in the history
  • Loading branch information
neurlang authored and Your Name committed Dec 28, 2024
1 parent 669ce4d commit d57432d
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 7 deletions.
13 changes: 11 additions & 2 deletions cmd/analysis/clean_language.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
#!/bin/bash

# Initialize a reverse flag
reverse_flag=""
for arg in "$@"; do
if [[ "$arg" == "--reverse" ]]; then
reverse_flag="_reverse"
break
fi
done

analysis_script="./analysis"

original_json="../../dicts/$1/language.json"
original_json="../../dicts/$1/language$reverse_flag.json"
srcfile="../../dicts/$1/dirty.tsv"
dstfile="../../dicts/$1/clean.tsv"
dstfile="../../dicts/$1/clean$reverse_flag.tsv"
$analysis_script --target 9999999999999 --lang "$original_json" --srcfile "$srcfile" --dstfile "$dstfile" -loss -nospaced -noipadash $2 $3 $4 $5 $6 $7 $8 $9

2 changes: 2 additions & 0 deletions cmd/analysis/clean_language_reverse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
./clean_language.sh $1 $2 $3 $4 $5 $6 $7 $8 $9 --reverse
11 changes: 10 additions & 1 deletion cmd/analysis/creator.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
#!/bin/bash

# Initialize a reverse flag
reverse_flag=""
for arg in "$@"; do
if [[ "$arg" == "--reverse" ]]; then
reverse_flag="_reverse"
break
fi
done

random=$(shuf -i 1-100000 -n 1)
original_json="../../dicts/$2/language.json"
original_json="../../dicts/$2/language$reverse_flag.json"
mutated_json="/tmp/language_mutated.$random.json"
analysis_script="./analysis"
srcfile="../../dicts/$2/dirty.tsv"
Expand Down
5 changes: 5 additions & 0 deletions cmd/analysis/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ func main() {
nospaced := flag.Bool("nospaced", false, "delete spacing")
padspace := flag.Bool("padspace", false, "insert space to the end of target word in case of a spaceless written language")
matrices := flag.Bool("matrices", false, "show edit matrices")
reverse := flag.Bool("reverse", false, "reverse translation (swap source and target languages)")
escapeunicode := flag.Bool("escapeunicode", false, "escape unicode when viewing")
normalize := flag.String("normalize", "", "normalize unicode, for instance to NFC")
deleteval := flag.Bool("deleteval", false, "delete one value")
Expand Down Expand Up @@ -567,6 +568,10 @@ func main() {
var threeways = make(map[string]uint64)

loop(*srcFile, 200, func(word1, word2 string) {

if reverse != nil && *reverse {
word1, word2 = word2, word1
}

if randsubs != nil && *randsubs != 0 {
if rand.Intn(1+*randsubs) != 0 {
Expand Down
13 changes: 11 additions & 2 deletions cmd/analysis/mutator.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
#!/bin/bash

# Initialize a reverse flag
reverse_flag=""
for arg in "$@"; do
if [[ "$arg" == "--reverse" ]]; then
reverse_flag="_reverse"
break
fi
done

# Paths to the files
mutations_file="../../dicts/$1/$2.language.json"
original_json="../../dicts/$1/language.json"
mutations_file="../../dicts/$1/$2.language$reverse_flag.json"
original_json="../../dicts/$1/language$reverse_flag.json"
mutated_json="/tmp/language_mutated.json"
analysis_script="./analysis"
srcfile="../../dicts/$1/dirty.tsv"
Expand Down
11 changes: 10 additions & 1 deletion cmd/analysis/remover.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
#!/bin/bash

# Initialize a reverse flag
reverse_flag=""
for arg in "$@"; do
if [[ "$arg" == "--reverse" ]]; then
reverse_flag="_reverse"
break
fi
done

# Paths to the files
random=$(shuf -i 1-100000 -n 1)
original_json="../../dicts/$2/language.json"
original_json="../../dicts/$2/language$reverse_flag.json"
mutated_json="/tmp/language_mutated.$random.json"
analysis_script="./analysis"
srcfile="../../dicts/$2/dirty.tsv"
Expand Down
2 changes: 2 additions & 0 deletions cmd/analysis/study_language_reverse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
./study_language.sh $1 $2 $3 $4 $5 $6 $7 $8 $9 --reverse
2 changes: 1 addition & 1 deletion cmd/analysis/train_language.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

#train

../../../classifier/cmd/train_phonemizer/train_phonemizer --cleantsv ../../dicts/$1/clean.tsv --dstmodel ../../dicts/$1/weights1.json.lzw $2
../../../classifier/cmd/train_phonemizer/train_phonemizer --cleantsv ../../dicts/$1/clean.tsv --dstmodel ../../dicts/$1/weights1.json.lzw $2 $3 $4 $5 $6
5 changes: 5 additions & 0 deletions cmd/analysis/train_language_reverse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

#train

../../../classifier/cmd/train_phonemizer/train_phonemizer --cleantsv ../../dicts/$1/clean_reverse.tsv --dstmodel ../../dicts/$1/weights1_reverse.json.lzw $2 $3 $4 $5 $6

0 comments on commit d57432d

Please sign in to comment.