-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathloci2phylip.py
92 lines (65 loc) · 2.54 KB
/
loci2phylip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
import argparse
import os
import errno
import sys
# Uses argparse library to parse command-line arguments; argparse must be imported
def Get_Arguments():
parser = argparse.ArgumentParser(description="each locus in a .loci file from pyRAD is output to a separate Phylip file")
parser.add_argument("-L", "--loci", type=str, required=True, help=".loci input filename")
args = parser.parse_args()
return args
def check_if_exists(filename):
try:
file = open(filename, "r")
except IOError:
print("\nError: The file " + filename + " does not exist.\n")
sys.exit(1)
def locusGenerator(file, locus_count):
locus = dict()
ind_count = 0
for line in fin:
line = line.strip()
lines = line.split()
if line.startswith("//"):
locus_count += 1
yield locus, locus_count, ind_count, seq_len
ind_count = 0
locus = dict()
elif line and \
(line[0].isalpha() or \
line[0].isdigit() or \
line[0].startswith(">")):
locus[lines[0]] = lines[1]
ind_count += 1
seq_len = len(lines[1])
def writePhylip(alignment, fout, indcount, seqlen):
fout.write(str(indcount) + " " + str(seqlen) + "\n")
for k, v in alignment.items():
if k.startswith(">"):
k = k[1:]
fout.write(k.ljust(15) + "\t" + str(v) + "\n")
# Makes subdirectory for outfiles
def makeLociDir(directory):
try:
os.makedirs(directory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
##########################################################################################################################################
##############################################################MAIN########################################################################
arguments = Get_Arguments()
check_if_exists(arguments.loci)
locus_num = 0
dir = "loci"
# Makes subdirectory called loci
makeLociDir(dir)
with open(arguments.loci, "r") as fin:
# Call generator function on input .loci file
# Output is a dictionary (sampleID: sequence) that is cleared and replaced for each locus
for aln, lcount, icount, slen in locusGenerator(fin, locus_num):
# makes outfile names for each locus
OF = ("locus" + str(lcount) + ".phy")
# Writes each locus as a separate Phylip file into ./loci/*.phy
with open(os.path.join(dir, OF), "w") as fout:
writePhylip(aln, fout, icount, slen)