-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpg12.py
68 lines (62 loc) · 1.76 KB
/
pg12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import itertools
import utils
import save
def generate(X, seqType, args):
'''
:param X:
:param seqType:
:param args:
:return:
'''
if seqType == 'DNA' or seqType == 'RNA':
p = [0]*(4*4*4) # As we are working for g12
else:
if seqType == 'PROT':
p = [0] * (20*20*20) # As we are working for g12
else: None
# Trail: Merged
elements = utils.sequenceElements(seqType)
m = list(itertools.product(elements, repeat=3))
T = []
for x in X:
merged = []
x = x[:args.terminusLength]
for i in range(1, args.gGap + 1):
kmers = utils.kmers(x, 3 + i) # g12 --> 3, gGap (g11+gGap)
t = []
require = (args.terminusLength - (3 + 1) + 1) - (len(x) - (3 + i) + 1)
for kmer in kmers:
d = {''.join(_): 0 for _ in m}
segment = kmer[0] + kmer[-2] + kmer[-1]
d[segment] = 1
t.append(list(d.values()))
# break
# break
# print(v)
if require > 0:
for i in range(require):
t.append(p)
# end-for
else:
None
t = np.array(t)
# print(t)
merged.append(t)
# print('------------------')
# end-for
T.append(np.concatenate((merged), axis=1))
# end-for
T = np.array(T)
# print(T.shape)
totalFeature = 0
if seqType == 'DNA' or seqType == 'RNA':
totalFeature = (4 * args.gGap * 4 * 4 )
else:
if seqType == 'PROT':
totalFeature = (20 * args.gGap * 20 * 20)
else:
None
# end-if
save.datasetSave(T, totalFeature, 'pg12')
#end-for