forked from williamslab/genetio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpersonio.h
110 lines (94 loc) · 3.88 KB
/
personio.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Library for I/O of genetic data
// Author: Amy Williams <alw289 cornell edu>
//
// This program is distributed under the terms of the GNU General Public License
#include <stdio.h>
#include <stdint.h>
#include <zlib.h>
#include "dynarray.h"
#include "hashtable.h"
#ifdef VCF
#include <htslib/hts.h>
#include <htslib/tbx.h>
#endif
#ifndef PERSONIO_H
#define PERSONIO_H
template <class P>
class PersonIO {
public:
//////////////////////////////////////////////////////////////////
// public static methods
//////////////////////////////////////////////////////////////////
static void readData(const char *genoFile, const char *markerFile,
const char *indFile, const char *onlyChr,
int startPos, int endPos, const char *XchrName,
int noFamilyId, bool vcfInput,
bool printTrioKids = false, FILE *log = NULL,
bool phased = false, int **numMendelError = NULL,
int **numMendelCounted = NULL,
bool allowEmptyParents = false, bool bulkData = false,
bool loopData = false, bool useParents = true,
bool ignoreAlleles = false);
static void readData(const char *genoFile, const char *markerFile,
const char *indFile, const char *onlyChr,
int startPos, int endPos, const char *XchrName,
int noFamilyId, FILE *log, bool allowEmptyParents,
bool bulkData, bool loopData = false,
bool useParents = true, bool ignoreAlleles = false);
#ifdef VCF
static void readVCF(const char *vcfFile, const char *onlyChr, int startPos,
int endPos, const char *XcharName, FILE *log = NULL);
#endif
static int readGenoRow(uint8_t * &data, int bytesPerMarker);
static void closeGeno();
static void printEigenstratGeno(FILE *out);
static void printEigenstratPhased(FILE *out, int numSamples = -1);
static void printGzEigenstratPhased(gzFile out);
static void printPed(FILE *out);
static void printPhasedIndFile(FILE *out, bool trioDuoOnly = false);
static void printImpute2Haps(FILE *out);
static void printGzImpute2Haps(gzFile out);
static void printImpute2SampleFile(FILE *out, bool trioDuoOnly = false);
// not needed -- only delete when program done: OS will manage
#ifdef FORCE_FREE
static void cleanUp() {
int len = P::_allIndivs.length();
for(int p = 0; p < len; p++) {
delete P::_allIndivs[p];
}
}
#endif
private:
//////////////////////////////////////////////////////////////////
// private static methods
//////////////////////////////////////////////////////////////////
static int getGenoFileType(FILE *genoIn, bool phased, FILE *outs[2]);
static void readIndivs(FILE *in, FILE *log, bool phased);
static bool readPedOrFamFile(FILE *in, bool omitFamilyId,
bool knowIsFam = false);
static void makePersonsFromIds(char **ids, uint32_t numIds);
static void parsePedGenotypes(FILE *in, P *thePerson);
static void findRelationships(FILE *in, FILE *log, bool omitFamilyId,
int *numMendelError, int *numMendelCounted,
bool createMissingParents);
static void removeIgnoreIndivs();
static void parsePackedAncestryMapFormat(FILE *in);
static void parseEigenstratFormat(FILE *in, bool phased);
static void parsePlinkBedFormat(FILE *in, FILE *outs[2]);
static void readPlinkBedBulk(FILE *in, FILE *outs[2]);
static void checkPlinkHeader(FILE *in, FILE *outs[2]);
#ifdef VCF
static void parseVCFGenotypes(htsFile *vcfIn, tbx_t *index, hts_itr_t *itr,
const char *vcfFile, FILE *outs[2]);
#endif
static void parsePackedGenotypes(FILE *in, int recordLen, char *buf,
int numIndivs, int type);
//////////////////////////////////////////////////////////////////
// private static variables
//////////////////////////////////////////////////////////////////
// for use with readGenoRow()
static FILE *_loopGenoIn;
static int _curLoopMarker;
static int _curOmitIdx;
};
#endif // PERSONIO_H