Skip to content

Commit

Permalink
refactor: separate qc modules
Browse files Browse the repository at this point in the history
  • Loading branch information
dnousome committed Dec 5, 2024
1 parent dd35373 commit 0735099
Show file tree
Hide file tree
Showing 17 changed files with 710 additions and 33 deletions.
33 changes: 33 additions & 0 deletions modules/local/bcftools_stats.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

process bcftools_stats {
/*
Quality-control step to collect summary statistics from bcftools stats.
When bcftools stats is run with one VCF file then stats by non-reference
allele frequency, depth distribution, stats by quality and per-sample
counts, singleton statsistics are calculated. Please see bcftools'
documentation for more information:
http://samtools.github.io/bcftools/bcftools.html#stats
@Input:
Per sample gVCF file (scatter)
@Output:
Text file containing a collection of summary statistics
*/
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi")
output:
path("${samplename}.germline.bcftools_stats.txt")

script:
"""
bcftools stats ${samplename}.gvcf.gz > ${samplename}.germline.bcftools_stats.txt
"""

stub:
"""
touch ${samplename}.germline.bcftools_stats.txt
"""

}
2 changes: 1 addition & 1 deletion modules/local/deepsomatic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process deepsomatic_tn_step1 {

input:
tuple val(tname), path(tbam), path(tbai),
val(nname), path(nbam), path(nbai),
val(nname), path(nbam), path(nbai),
path(bed)

output:
Expand Down
43 changes: 43 additions & 0 deletions modules/local/fastq_screen.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FASTQ_SCREEN_CONF=file(params.fastq_screen_conf)

process fastq_screen {
//Uses Trimmed Files
container = "${params.containers.loganqc}"
label 'process_medium'

input:
tuple val(samplename),
path("${samplename}.R1.trimmed.fastq.gz"),
path("${samplename}.R2.trimmed.fastq.gz"),
path("${samplename}.fastp.json"),
path("${samplename}.fastp.html")

output:
tuple path("${samplename}.R1.trimmed_screen.html"),
path("${samplename}.R1.trimmed_screen.png"),
path("${samplename}.R1.trimmed_screen.txt"),
path("${samplename}.R2.trimmed_screen.html"),
path("${samplename}.R2.trimmed_screen.png"),
path("${samplename}.R2.trimmed_screen.txt")

script:
FASTQ_SCREEN_CONF=file(params.fastq_screen_conf)

"""
fastq_screen --conf $FASTQ_SCREEN_CONF \
--outdir . \
--threads 8 \
--subset 1000000 \
--aligner bowtie2 \
--force \
${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz
"""

stub:
"""
touch ${samplename}.R1.trimmed_screen.html ${samplename}.R1.trimmed_screen.png
touch ${samplename}.R1.trimmed_screen.txt ${samplename}.R2.trimmed_screen.html
touch ${samplename}.R2.trimmed_screen.png ${samplename}.R2.trimmed_screen.txt
"""
}
36 changes: 36 additions & 0 deletions modules/local/fastqc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

process fastqc {
"""
Quality-control step to assess sequencing quality of each sample.
FastQC generates a set of basic statistics to identify problems
that can arise during sequencing or library preparation.
@Input:
Recalibrated BAM file (scatter)
@Output:
FastQC report and zip file containing sequencing quality information
"""
container = "${params.containers.loganqc}"
label 'process_medium'

input:
tuple val(samplename), path(bam), path(bai)
output:
tuple val(samplename), path("${samplename}_fastqc.html"), path("${samplename}_fastqc.zip")

script:

"""
mkdir -p fastqc
fastqc -t 8 \
-f bam \
-o fastqc \
$bam
mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html
mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip
"""

stub:
"""
touch ${samplename}_fastqc.html ${samplename}_fastqc.zip
"""
}
25 changes: 25 additions & 0 deletions modules/local/fc_lane.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process fc_lane {
container = "${params.containers.logan}"
label 'process_low'

input:
tuple val(samplename), path(fqs)

output:
tuple val(samplename),
path("${samplename}.fastq.info.txt")

script:
GET_FLOWCELL_LANES=file(params.get_flowcell_lanes)

"""
python $GET_FLOWCELL_LANES \
${fqs[0]} \
${samplename} > ${samplename}.fastq.info.txt
"""

stub:
"""
touch ${samplename}.fastq.info.txt
"""
}
78 changes: 78 additions & 0 deletions modules/local/gatk_varianteval.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
GENOMEREF=file(params.genomes[params.genome].genome)
DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz"


process gatk_varianteval {
/*
Quality-control step to calculate various quality control metrics from a
variant callset. These metrics include the number of raw or filtered SNP
counts; ratio of transition mutations to transversions; concordance of a
particular sample's calls to a genotyping chip; number of s per sample.
Please see GATK's documentation for more information:
https://gatk.broadinstitute.org/hc/en-us/articles/360040507171-VariantEval
@Input:
Per sample gVCF file (scatter)
@Output:
Evaluation table containing a collection of summary statistics
*/
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi")
output:
path("${samplename}.germline.eval.grp")
script:
"""
gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \
-R $GENOMEREF \
-O ${samplename}.germline.eval.grp \
--dbsnp $DBSNP \
--eval ${samplename}.gvcf.gz
"""

stub:

"""
touch ${samplename}.germline.eval.grp
"""

}

process collectvariantcallmetrics {
/*
Quality-control step to collect summary metrics about snps and indels
called in a multisample VCF file. Please see the Broad's documentation
for more information about each field in the generated log file:
https://broadinstitute.github.io/picard/picard-metric-definitions.html
@Input:
Multi-sample gVCF file (indirect-gather-due-to-aggregation)
@Output:
Text file containing a collection of metrics relating to snps and indels
*/
container = "${params.containers.logan}"
label 'process_medium'

input:
tuple path(germlinevcf),path(germlinetbi)

output:
tuple path("raw_variants.variant_calling_detail_metrics"),
path("raw_variants.variant_calling_summary_metrics")


script:
"""
java -Xmx24g -jar \${PICARDJARPATH}/picard.jar \
CollectVariantCallingMetrics \
INPUT=${germlinevcf} \
OUTPUT= "raw_variants" \
DBSNP=$DBSNP Validation_Stringency=SILENT
"""

stub:
"""
touch raw_variants.variant_calling_detail_metrics raw_variants.variant_calling_summary_metrics
"""

}
54 changes: 54 additions & 0 deletions modules/local/kraken.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
BACDB=file(params.genomes[params.genome].KRAKENBACDB)

process kraken {
/*
Quality-control step to assess for potential sources of microbial contamination.
If there are high levels of microbial contamination, Kraken will provide an
estimation of the taxonomic composition. Kraken is used in conjunction with
Krona to produce an interactive reports.
@Input:
Trimmed FastQ files (scatter)
@Output:
Kraken logfile and interactive krona report
*/
container = "${params.containers.loganqc}"
label 'process_high'

input:
tuple val(samplename),
path(fqs)

output:
tuple val(samplename),
//path("${samplename}.trimmed.kraken_bacteria.out.txt"),
path("${samplename}.trimmed.kraken_bacteria.taxa.txt"),
path("${samplename}.trimmed.kraken_bacteria.krona.html")


script:
"""
#Setups temporary directory for
#intermediate files with built-in
#mechanism for deletion on exit
# Copy kraken2 db to local node storage to reduce filesystem strain
cp -rv $BACDB .
kdb_base=\$(basename $BACDB)
kraken2 --db $BACDB \
--threads 16 --report ${samplename}.trimmed.kraken_bacteria.taxa.txt \
--output - \
--gzip-compressed \
--paired ${fqs[0]} ${fqs[1]}
# Generate Krona Report
cut -f2,3 ${samplename}.trimmed.kraken_bacteria.taxa.txt | \
ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html
"""

stub:
"""
touch ${samplename}.trimmed.kraken_bacteria.taxa.txt ${samplename}.trimmed.kraken_bacteria.krona.html
"""

}
41 changes: 41 additions & 0 deletions modules/local/mosdepth.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

process mosdepth {
/*
Quality-control step to assess depth
@Input:
Recalibrated BAM file (scatter)
@Output:
`{prefix}.mosdepth.global.dist.txt`
`{prefix}.mosdepth.summary.txt`
`{prefix}.mosdepth.region.dist.txt` (if --by is specified)
`{prefix}.per-base.bed.gz|per-base.d4` (unless -n/--no-per-base is specified)
`{prefix}.regions.bed.gz` (if --by is specified)
`{prefix}.quantized.bed.gz` (if --quantize is specified)
`{prefix}.thresholds.bed.gz` (if --thresholds is specified)
*/
container = "${params.containers.loganqc}"
label 'process_medium'

input:
tuple val(samplename), path(bam), path(bai)

output:
tuple path("${samplename}.mosdepth.region.dist.txt"),
path("${samplename}.mosdepth.summary.txt"),
path("${samplename}.regions.bed.gz"),
path("${samplename}.regions.bed.gz.csi")


script:
"""
mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus
"""

stub:
"""
touch "${samplename}.mosdepth.region.dist.txt"
touch "${samplename}.mosdepth.summary.txt"
touch "${samplename}.regions.bed.gz"
touch "${samplename}.regions.bed.gz.csi"
"""
}
36 changes: 36 additions & 0 deletions modules/local/multiqc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

process multiqc {
"""
Reporting step to aggregate sample summary statistics and quality-control
information across all samples. This will be one of the last steps of the
pipeline. The inputs listed here are to ensure that this step runs last.
During runtime, MultiQC will recursively crawl through the working directory
and parse files that it supports.
@Input:
List of files to ensure this step runs last (gather)
@Output:
Interactive MulitQC report and a QC metadata table
"""
container = "${params.containers.multiqc}"
label 'process_low'

input:
path(allqcin)

output:
path("MultiQC_Report.html")

script:

"""
multiqc . \
-f --interactive \
-n "MultiQC_Report.html" \
"""

stub:

"""
touch MultiQC_Report.html
"""
}
Loading

0 comments on commit 0735099

Please sign in to comment.