-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadjust_fasta_mothur.pl
executable file
·151 lines (138 loc) · 3.86 KB
/
adjust_fasta_mothur.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#! /usr/bin/perl
#
#
die dieHelp () unless (@ARGV);
$j=@ARGV;
$i=0;
until ($i >= $j){
$k=$i+1;
if ($ARGV[$i] eq "-f"){
$file1=$ARGV[$k];
} elsif ($ARGV[$i] eq "-q"){
$file2=$ARGV[$k];
} elsif ($ARGV[$i] eq "-out"){
$output=$ARGV[$k];
} elsif ($ARGV[$i] eq "-len"){
$barlen=$ARGV[$k];
} elsif ($ARGV[$i] eq "-change"){
$change=$ARGV[$k];
} elsif ($ARGV[$i] eq "-m"){
$input=$ARGV[$k];
} elsif ($ARGV[$i] eq "-fq"){
$fq=$ARGV[$k];
} elsif ($ARGV[$i] eq "-h" || $ARGV[$i] eq "--help" || $ARGV[$i] eq "-help"){
exit dieHelp ();
}
$i++;
}
die dieHelp () unless ($output && $file1 && $file2 && $input);
chomp ($file1);
chomp ($file2);
chomp ($input);
chomp ($output);
chomp ($barlen);
chomp ($change);
chomp ($fq) if ($fq);
$barlen=8 unless ($barlen);
$change="n" unless ($change);
$fq=60 unless ($fq);
open (IN, "<${input}") or die "Can't open $input\n";
while ($line=<IN>){
next unless ($line=~/^barcode/);
($type, $barseq, $outname) = split ("\t", $line);
#make a check of the forward barcode seq
if ($change=~/[Yy]/){
($rc_seq)=$barseq;
($rev)=revcomp($rc_seq);
$hash{$rev}=$barseq;
} else {
$hash{$barseq}=$barseq;
}
}
close (IN);
#make fake barcode seq
$i=0;
$fakestring=();
until ($i >=$barlen){
if ($fakestring){
$fakestring="$fakestring"."N";
$fakequal="$fakequal"." $fq";
} else {
$fakestring="N";
$fakequal="$fq";
}
$i++;
}
$/=">";
open (IN1, "<${file1}") or die "Can't open $file1\n";
open (FA, ">${output}.fasta") or die "Can't open ${output}.fasta\n";
while ($line1=<IN1>){
chomp ($line1);
next unless ($line1);
($header1, @seqs)=split("\n", $line1);
($sequence)=join ("", @seqs);
($barseq)=$header1=~/\#([A-z]{$barlen})/;
if ($hash{$barseq}){
print FA ">$header1\n${hash{$barseq}}${sequence}\n";
} else {
print FA ">$header1\n${fakestring}${sequence}\n";
}
}
close (IN1);
close (FA);
open (IN1, "<${file2}") or die "Can't open $file2\n";
open (QUAL, ">${output}.qual") or die "Can't open ${output}.qual\n";
while ($line1=<IN1>){
chomp ($line1);
next unless ($line1);
($header1, @seqs)=split("\n", $line1);
print QUAL ">$header1\n$fakequal ";
foreach $thing (@seqs){
print QUAL "${thing}\n";
}
}
close (IN1);
close (QUAL);
sub revcomp{
(@pieces) = split ("", ${rc_seq});
#make reverse complement
$j = @pieces;
$j-=1;
$seq = ();
until ($j < 0){
if ($pieces[$j]){
if ($pieces[$j] eq "A"){
$seq = "$seq"."T";
} elsif ($pieces[$j] eq "T"){
$seq = "$seq"."A";
} elsif ($pieces[$j] eq "C"){
$seq = "$seq"."G";
} elsif ($pieces[$j] eq "G"){
$seq = "$seq"."C";
} elsif ($pieces[$j] eq "N"){
$seq = "$seq"."N";
} else {
die "What is this (reverse complement): $pieces[$j] $rc_seq $j\n";
}
} else {
die "NO j $j\n";
}
$j-=1;
}
return ($seq);
}
sub dieHelp {
$programname="adjust_fasta_mothur.pl";
die "Usage: perl $programname -f fasta.file -q qual.file -out output_prefix -m oligo_file -len 8 -change y
Order unimportant
Synopsis: Use this program to adjust a fasta and qual file from Illumina with the indexing sequence in the file name for use in mothur SOP
Options:
-f fasta_file A fasta file to be changed (required)
-q qual_file A qual file corresponding to the fasta file to be changed (required)
-out The output prefix which will be used to generate the .fa and .qual files (required)
-m oligo_file The oligo file used by mothur for indexing (required)
-len number Number of bases in the indexing sequence in the header after \# (regex expression \#{number}) (optional default=8)
-change y/n Change the orientation of the barcode to reverse complement (optional default=y)
-fq number The fake quality score to be given to the indexing bases (optional default=60)
\n";
}