-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorthomclReduceGroups
55 lines (41 loc) · 1.19 KB
/
orthomclReduceGroups
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/perl
use strict;
&usage() unless scalar(@ARGV) == 2;
my $groupsfile = $ARGV[0];
my $taxa_str = $ARGV[1];
if ($groupsfile =~ /\.gz$/) {
open(IN, "zcat $groupsfile|") || die "Can't open groups file '$groupsfile'\n";
} else {
open(IN, $groupsfile) || die "Can't open groups file '$groupsfile'\n";
}
my @t = split(/,/, $taxa_str);
scalar(@t) > 1 || die "Invalid taxa specification '$taxa_str'\n";
my $taxa;
map {$taxa->{$_} = 1} @t;
while(<IN>) {
chomp;
my @group = split(/\s/);
my $group_id = shift(@group);
my $filteredGroup;
foreach my $id (@group) {
my ($taxon, $dontcare) = split(/\|/, $id);
push(@$filteredGroup, $id) if $taxa->{$taxon};
}
if ($filteredGroup) {
my $fg = join(" ", @$filteredGroup);
print "$group_id $fg\n";
}
}
sub usage {
print STDERR "
Reduce a groups file by taxon. Input is a groups file and a set of taxa.
Output is a groups file that contains only those taxa
Usage:
orthomclReduceGroups groups_file taxa
where:
groups_file: a standard orthomcl groups file. (.gz file is ok)
taxa: a comma delimited list of taxon abbreviations
EXAMPLE: orthomclSoftware/bin/orthomclReduceGroups groups.txt hsa,pfa,txo
";
exit(1);
}