forked from rcedgar/syncmer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastaseqsource.cpp
115 lines (105 loc) · 2.12 KB
/
fastaseqsource.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#include "myutils.h"
#include "fastaseqsource.h"
#include "seqinfo.h"
#include "alpha.h"
#include "omplock.h"
bool FastaFileIsNucleo(FILE *f);
FASTASeqSource::FASTASeqSource()
{
}
FASTASeqSource::~FASTASeqSource()
{
}
bool FASTASeqSource::GetIsNucleo()
{
FILE *f = m_LR.m_f;
asserta(f != 0);
return FastaFileIsNucleo(f);
}
// Caller must own memory because SeqSource may be shared
// between threads, so SeqInfo should be thread-private.
bool FASTASeqSource::GetNextLo(SeqInfo *SI)
{
if (m_LR.m_EOF)
return false;
bool TruncLabels = opt(trunclabels);
// Outer for loop just to allow skipping of empty sequences
for (;;)
{
// Special case at start of file
if (m_LR.m_LineNr == 0)
{
bool Ok = ReadLine();
if (!Ok)
return false;
}
unsigned SeqIndex = m_SeqCount;
SI->Init(SeqIndex);
SI->m_Qual = 0;
const char *Line = m_LineBuff.Data;
unsigned n = m_LineBuff.Size;
if (n == 0)
{
bool Ok = ReadLine();
if (!Ok)
return false;
asserta(n > 0);
}
if (Line[0] != '>')
Die("Bad FASTA file %s, expected '>' in line %u",
GetFileNameC(), m_LR.m_LineNr);
SI->AllocLabel(n);
char *Label = SI->m_LabelBuffer;
for (unsigned i = 1; i < n; ++i)
{
byte c = Line[i];
if (TruncLabels && isspace(c))
{
Label[i-1] = 0;
break;
}
Label[i-1] = c;
}
Label[n-1] = 0;
unsigned SeqLength = 0;
for (;;)
{
bool Ok = ReadLine();
if (!Ok)
break;
const char *Line = m_LineBuff.Data;
unsigned n = m_LineBuff.Size;
if (n > 0 && Line[0] == '>')
break;
SI->m_L = SeqLength;
SI->AllocSeq(SeqLength + n);
byte *Seq = SI->m_SeqBuffer;
for (unsigned i = 0; i < n; ++i)
{
byte c = (byte) Line[i];
if (isspace(c))
continue;
if (c == '-' || c == '.')
{
if (m_StripGaps)
continue;
}
else if (!isalpha(c))
{
BadByte(c);
continue;
}
Seq[SeqLength++] = c;
}
}
SI->m_L = SeqLength;
if (SeqLength > 0)
return true;
else
{
Warning("Empty sequence at line %u in FASTA file %s, label >%s",
GetLineNr(), GetFileNameC(), SI->m_Label);
continue;
}
}
}