-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
120 lines (81 loc) · 3.52 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# **************************************
# * metagenomics / metatranscriptomics *
# **************************************
Sortmerna_run: FALSE
# *********
# * metqc *
# *********
# List of files
list_files: "list_files.txt"
# Input directory Path to raw sequences
input_dir: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/metatranscriptomics/data"
# Output directory path for output files.
output_dir: "output"
#WHere the snakemake folder is
path: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/metatranscriptomics/analysis"
# Suffix for forward reads.
forward_read_suffix: "_1.fastq.gz"
# Suffix for reverse reads.
reverse_read_suffix: "_2.fastq.gz"
### Cutadapt parameters
# The number of cpu cores to use for cutadapt
num_cpus: 7
# Choose whether to run cutadapt
run_cutadapt: TRUE
# Adapters for cutadapt
fwd_adapter: CTGTCTCTTATACACATCT
rev_adapter: CTGTCTCTTATACACATCT
### Prinseq parameters
trimleft: 0
trimright: 0
trim_qual_left: 30
trim_qual_right: 30
trim_qual_window: 10
trim_qual_step : 2
trim_qual_type: "mean"
trim_qual_rule: "lt"
lc_method: "dust"
lc_threshold: 7
# Minimum length for reads
minlength: 60
# Maximum number of N bases allowed
maxn: 15
### BMtagger Parameters
# Choose whether to run bmtagger
run_bmtagger: TRUE
# Index for bmfilter (part of bmtagger), bitmask file
bmfilter_ref: "/bulk/IMCshared_bulk/shared/dbs/bmtaggerDB/hg19_rRNA_mito_Hsapiens_rna/hg19_rRNA_mito_Hsapiens_rna_reference.bitmask"
# Index for srprism (part of bmtagger)
srprism_ref: "/bulk/IMCshared_bulk/shared/dbs/bmtaggerDB/hg19_rRNA_mito_Hsapiens_rna/hg19_rRNA_mito_Hsapiens_rna_reference.srprism"
#***************
#** sortmerna **
#***************
threads: 8
#DBs
##sortmerna
ref1: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-arc-16s-id95.fasta"
ref2: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-arc-23s-id98.fasta"
ref3: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-bac-16s-id90.fasta"
ref4: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-bac-23s-id98.fasta"
ref5: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-euk-18s-id95.fasta"
ref6: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/sortmerna/silva-euk-28s-id98.fasta"
#***************
#** metaphlan **
#****************
## Metaphlan bowtie2 database.
metaphlan_database: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metaphlan"
sgb_to_gtdb_tsv_file: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metaphlan/mpa_vJan21_CHOCOPhlAnSGB_202103_SGB2GTDB.tsv"
#*****************
#** metannotate **
#*****************
# Location of nucleotide database
nuc_db: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/chocophlan"
# Location os protein database
prot_db: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/uniref"
# Location of maping files
uniref90_name: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_uniref90_name.txt.bz2"
eggnog_uniref90: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_eggnog_uniref90.txt.gz"
ko_uniref90: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_ko_uniref90.txt.gz"
eggnog_name: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_eggnog_name.txt.gz"
ko_name: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_ko_name.txt.gz"
uniref_name: "/bulk/IMCbinf_bulk/sbagheri/Projects_IMC/databases/metannotate/utility_mapping/map_uniref90_name.txt.bz2"