-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaligns.py
executable file
·48 lines (37 loc) · 1.43 KB
/
aligns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
class Align(object):
def __init__(self, absolute_max_string_len=128, label_func=None):
self.label_func = label_func
self.absolute_max_string_len = absolute_max_string_len
def from_file(self, path):
with open(path, 'r') as f:
lines = f.readlines()
align = [(float((y[1]))*25, float((y[2]))*25, y[0]) for y in [x.strip().split(" ") for x in lines[4:]]]
self.build(align)
return self
def from_array(self, align):
self.build(align)
return self
def build(self, align):
self.align = align
self.sentence = self.get_sentence(align)
self.label = self.get_label(self.sentence)
self.padded_label = self.get_padded_label(self.label)
def strip(self, align, items):
return [sub for sub in align if sub[2] not in items]
def get_sentence(self, align):
return " ".join([y[-1] for y in align if y[0]/25 <4.8] )
def get_label(self, sentence):
return self.label_func(sentence)
def get_padded_label(self, label):
padding = np.ones((self.absolute_max_string_len-len(label))) * -1
return np.concatenate((np.array(label), padding), axis=0)
@property
def word_length(self):
return len(self.sentence.split(" "))
@property
def sentence_length(self):
return len(self.sentence)
@property
def label_length(self):
return len(self.label)