forked from opentargets/genetics-v2d-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_calculate_LD_table.Snakefile
63 lines (49 loc) · 1.36 KB
/
2_calculate_LD_table.Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env snakemake
'''
Makes:
1. LD table
'''
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
from datetime import date
import sys
import pandas as pd
# Load configuration
configfile: "configs/config.yaml"
tmpdir = config['temp_dir']
KEEP_LOCAL = False
if 'version' not in config:
config['version'] = date.today().strftime("%y%m%d")
#
# Load LD manifest and variant ID LUT
#
in_manifest = 'output/{version}/ld_analysis_input.tsv'.format(version=config['version'])
# Load manifest
manifest = pd.read_csv(
in_manifest,
sep='\t',
header=0,
dtype='object'
)
# Ony keep chromosomes in 1000G
valid_chroms = [str(x) for x in range(1, 23)] + ['X'] # DEBUG
# valid_chroms = ['22']
manifest.chrom = manifest.chrom.astype(str)
manifest = manifest.loc[manifest.chrom.isin(valid_chroms), :]
# manifest = manifest.iloc[:20, :] # DEBUG
# Make variant id list
varid_list = manifest.variant_id.unique().tolist()
#
# Make main LD target and workflow
#
targets = []
# # Make targets for ld table
targets.append(
'output/{version}/ld.parquet'.format(version=config['version']) )
# Trigger making of targets
rule all:
input:
targets
# Add workflows
include: 'snakefiles/ld_table_2.Snakefile'