-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathper_cg_methylation.py
68 lines (40 loc) · 1.4 KB
/
per_cg_methylation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#! /usr/bin/env python
import math
import sys
import csv
import re
from methylation_parsers import MethyltestRecord
import argparse
def update_meths(loc_tuple,meth, trinuc):
if loc_tuple not in bigtab:
##coverage first, then num methylated
bigtab[loc_tuple]=[1,meth, trinuc]
else:
bigtab[loc_tuple][0] += 1
bigtab[loc_tuple][1] += meth
parser = argparse.ArgumentParser( description='Take in per CG TSV file and summarize methylation information at each position as a bismark style cov file')
parser.add_argument('-i', '--input', type=str, required=False)
args=parser.parse_args()
if args.input:
in_file = open(args.input)
else:
in_file = sys.stdin
reader=csv.reader(in_file, delimiter='\t')
next(reader, None) ##skip header
##Generate bedMethyl file
outfile=args.input
outfile=outfile.replace('.phase.tsv', '.cyto.txt')
tab=open(outfile, mode='w')
tabwriter=csv.writer(tab, delimiter='\t')
##Coverage table and methylated table, indexed by tuples of chr and coord
bigtab = dict()
for line in reader:
location=(line[0], int(line[1]))
meth=int(line[5])
if (meth != 0):
meth=(meth+1)/2
update_meths(location, meth, line[6])
for key, value in bigtab.iteritems():
##Bismark cytosine report style file
tabwriter.writerow([key[0], key[1]+1, "*", value[1], value[0]-value[1], "CG", value[2]])
tab.close()