-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUtils.py
38 lines (35 loc) · 1.41 KB
/
Utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import sys, argparse, sh, random
def Indexing (Input, SampleList, Contaminant):
Index={}
Sex={}
Pop={}
Targets="{}\|"*(len(SampleList))
Targets+="{}"
for line in sh.grep(sh.cat("-n",Input+".ind"), Targets.format(*SampleList,Contaminant),_ok_code=[0,1]):
fields=line.strip().split()
if fields[1] in SampleList or fields[1] == Contaminant:
Index[fields[1]]=(int(fields[0]) -1)
Sex [fields[1]]=fields[2]
Pop [fields[1]]=fields[3]
for Ind in (SampleList + [Contaminant]):
if Ind not in Index:
if Ind != '':
raise ValueError("Individual '{}' could not be indexed. Execution halted.".format(Ind))
return (Index, Sex, Pop);
def CheckInputFiles(Input):
##Check geno and snp compatibility
lineNo = ""
for line in sh.grep(sh.wc("-l", Input+".geno", Input+".snp"), Input):
if lineNo=="":
lineNo=line.strip().split()[0]
elif lineNo==line.strip().split()[0]:
break
elif lineNo!=line.strip().split()[0]:
raise IOError("Input .snp and .geno files do not match.")
##Check geno and ind compatibility
with open(Input+".geno", "r") as f:
for line in f:
if str(len(line.strip())) == sh.wc("-l", Input+".ind").strip().split()[0]:
break
else:
raise IOError("Input .ind and .geno files do not match.");