-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfg11.py
61 lines (54 loc) · 1.43 KB
/
fg11.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# print('X---XX')
import utils
import itertools
import numpy as np
import save
def generate(X, seqType, args):
'''
# Note-1: args.gGap --> 1, 2, 3
# Note-2: gGap --> ('X', 'X')
:param X:
:param seqType:
:param args:
:return:
'''
elements = utils.sequenceElements(seqType)
m2 = list(itertools.product(elements, repeat=2))
m = m2
# print(args.gGap)
T = []
for x in X:
x = x[:args.terminusLength]
t = []
for i in range(1, args.gGap + 1, 1):
V = utils.kmers(x, i + 2)
# seqLength = len(x) - (i+2) + 1
for gGap in m:
# print(gGap[0], end='')
# print('-'*i, end='')
# print(gGap[1])
# trackingFeatures.append(gGap[0] + '-' * i + gGap[1])
C = 0
for v in V:
if v[0] == gGap[0] and v[-1] == gGap[1]:
C += 1
# print(C, end=',')
t.append(C)
#end-for
#end-for
t = np.array(t)
# t = t.reshape(-1, 1)
T.append(t)
# end-for
T = np.array(T)
# print(T.shape)
totalFeature = 0
if seqType == 'DNA' or seqType == 'RNA':
totalFeature = (4*args.gGap*4)
else:
if seqType == 'PROT':
totalFeature = (20*args.gGap*20)
else: None
#end-if
save.datasetSave(T, totalFeature, 'fg11')
#end-def