diff --git a/bin/freec_paired.pl b/bin/make_freec_genome_paired.pl similarity index 95% rename from bin/freec_paired.pl rename to bin/make_freec_genome_paired.pl index 161e24e..474dfaf 100644 --- a/bin/freec_paired.pl +++ b/bin/make_freec_genome_paired.pl @@ -26,18 +26,19 @@ print C "chrFiles = $chrFiles\n"; print C "minimalSubclonePresence = 20\nmaxThreads = 8\n"; print C "outputDir = $ARGV[0]\n\n"; - + print C '[sample]' . "\n\n"; - + print C "mateFile = $tumormateFile\n"; print C "inputFormat = BAM\nmateOrientation = FR\n\n"; -print C '[BAF]' . "\n\n"; +print C '[control]' . "\n\n"; print C "mateFile = $controlmateFile\n"; print C "inputFormat = BAM\nmateOrientation = FR\n\n"; - + +print C '[BAF]' . "\n\n"; print C "makePileup = $makePileup\n"; print C "fastaFile = $fastaFile\n"; print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n"; -print C "SNPfile = $SNPfile"; +print C "SNPfile = $SNPfile"; \ No newline at end of file diff --git a/conf/base.config b/conf/base.config index c525125..0eb1dee 100644 --- a/conf/base.config +++ b/conf/base.config @@ -51,6 +51,21 @@ process { withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withLabel:process_somaticcaller { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 64.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } + withLabel:process_somaticcaller_high { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 70.GB * task.attempt, 'memory' ) } + time = { check_max( 48.h * task.attempt, 'time' ) } + } + withLabel:process_highmem { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 48.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } withLabel:error_ignore { errorStrategy = 'ignore' } @@ -58,4 +73,5 @@ process { errorStrategy = 'retry' maxRetries = 2 } + } diff --git a/conf/biowulf.config b/conf/biowulf.config index a679cf1..77a06d5 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -22,7 +22,7 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist = 'https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' } env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" @@ -34,4 +34,4 @@ process { stageOutMode = 'rsync' // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps cache = 'lenient' -} \ No newline at end of file +} diff --git a/conf/containers.config b/conf/containers.config index 504d3b8..2ceaf3c 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -2,6 +2,9 @@ params { containers { base = 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' - logan = 'docker://dnousome/ccbr_logan_base:v0.3.0' + logan = 'docker://dnousome/ccbr_logan_base:v0.3.3' + vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' + octopus = 'docker://dancooke/octopus:latest' + } } diff --git a/conf/genomes.config b/conf/genomes.config index 2ee6cdc..3d0843a 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -1,11 +1,11 @@ params { genomes { 'hg38' { - genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" - genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" + genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" + genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" bwagenome= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.fasta" genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" - wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" + wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // @@ -14,7 +14,7 @@ params { dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_indel.vcf" gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz - pon = "/data/nousomedr/wgs/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} + pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh38.86" @@ -29,11 +29,11 @@ params { octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] - } + } 'mm10' { - genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) - genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) + genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa" genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict" intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed" @@ -66,6 +66,3 @@ params { } } } - - - diff --git a/conf/interactive.config b/conf/interactive.config index 725d1ae..9808cb5 100644 --- a/conf/interactive.config +++ b/conf/interactive.config @@ -4,7 +4,7 @@ params { max_cpus = 56 max_time = '12 h' - + } process.scratch = false @@ -14,4 +14,6 @@ singularity { autoMounts = true cacheDir = "/data/CCBR_Pipeliner/SIFS" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID,SINGULARITY_BINDPATH' -} \ No newline at end of file +} + +env.SINGULARITY_CACHEDIR = "/data/CCBR_Pipeliner/SIFS" diff --git a/conf/modules.config b/conf/modules.config index 4c3522c..d171cef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -9,27 +9,22 @@ process { errorStrategy = 'finish' withName:'sequenza' { - container = 'dnousome/ccbr_logan_base:v0.3.0' publishDir = [ path: { "${params.outdir}/cnv/sequenza" }, mode: 'copy' ] } - withName: 'freec_paired' { publishDir = [ - path: { "${params.outdir}/cnv/freec" }, + path: { "${params.outdir}/cnv/freec_paired" }, mode: 'copy' ] - container = 'dnousome/ccbr_logan_base:v0.3.0' - } - withName:'freec' { - container = 'dnousome/ccbr_logan_base:v0.3.0' + withName: 'freec' { publishDir = [ - path: { "${params.outdir}/cnv/freec" }, + path: { "${params.outdir}/cnv/freec_unpaired" }, mode: 'copy' ] } @@ -192,7 +187,7 @@ process { mode: 'copy' ] } - + withName: 'annotsv_tn' { publishDir = [ path: { "${params.outdir}/SV/annotated" }, @@ -291,5 +286,6 @@ process { path: { "${params.outdir}/vcfs/combined" }, mode: 'copy' ] - } + } + } diff --git a/main.nf b/main.nf index f2c2802..0a715e1 100644 --- a/main.nf +++ b/main.nf @@ -5,14 +5,12 @@ date = new Date().format( 'yyyyMMdd' ) log.info """\ - L O G A N P I P E L I N E + L O G A N P I P E L I N E ============================= genome: ${params.genome} outdir: ${params.outdir} Sample Sheet: ${params.sample_sheet} - Samples: ${params.fastq_input} ${params.file_input} ${params.bam_input} - NF version : $nextflow.version - + Samples: ${params.fastq_input} ${params.fastq_file_input} ${params.bam_input} ${params.bam_file_input} """ .stripIndent() @@ -40,8 +38,7 @@ workflow.onComplete { //Final Workflow workflow { - DETERMINEBAM() - if ([params.fastq_input,params.file_input].any() && params.sample_sheet && !params.BAMINPUT){ + if ([params.fastq_input,params.fastq_file_input].any() && params.sample_sheet){ println "Tumor-Normal FASTQ" INPUT() ALIGN(INPUT.out.fastqinput,INPUT.out.sample_sheet) @@ -52,10 +49,10 @@ workflow { //Tumor-Normal VC, SV, CNV if (params.vc){ VC(ALIGN.out.bamwithsample,ALIGN.out.splitout,ALIGN.out.sample_sheet) - } + } if (params.sv){ SV(ALIGN.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse(ALIGN.out.bamwithsample) @@ -67,25 +64,25 @@ workflow { CNVhuman(ALIGN.out.bamwithsample,VC.out.somaticcall_input) } } - } + } if (params.qc && params.gl){ QC_GL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout,GL.out.glnexusout,GL.out.bcfout) } else if (params.qc){ QC_NOGL(ALIGN.out.fastqin,ALIGN.out.fastpout,ALIGN.out.bqsrout) - } + } } - + //TUMOR-NOMRAL BAM INPUT - if ([params.bam_input,params.file_input].any() && params.sample_sheet && BAMINPUT){ + if ([params.bam_input,params.bam_file_input].any() && params.sample_sheet){ println "Tumor-Normal with BAMs" INPUT_BAM() if (params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) - } + } if (params.sv){ SV(INPUT_BAM.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse(INPUT_BAM.out.bamwithsample) @@ -93,15 +90,15 @@ workflow { if (!params.vc){ VC(INPUT_BAM.out.bamwithsample,INPUT_BAM.out.splitout,INPUT_BAM.out.sample_sheet) CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) - }else { + }else { CNVhuman(INPUT_BAM.out.bamwithsample,VC.out.somaticcall_input) } } } - } - + } + ///Tumor Only Pipelines - if ([params.fastq_input,params.file_input].any() && !params.sample_sheet && !params.BAMINPUT){ + if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ println "Tumor-Only FASTQ" INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) @@ -118,7 +115,7 @@ workflow { if (!params.vc){ VC_TONLY(ALIGN_TONLY.out.bamwithsample,ALIGN_TONLY.out.splitout,ALIGN_TONLY.out.sample_sheet) CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } else{ + } else { CNVhuman_tonly(ALIGN_TONLY.out.bamwithsample,VC_TONLY.out.somaticcall_input) } } @@ -129,7 +126,7 @@ workflow { } //Variant Calling from BAM-Tumor Only Mode - if ([params.bam_input,params.file_input].any() && !params.sample_sheet && params.BAMINPUT){ + if ([params.bam_input,params.bam_file_input].any() && !params.sample_sheet){ println "Tumor-Only BAM" INPUT_TONLY_BAM() if (params.vc){ @@ -137,20 +134,19 @@ workflow { } if (params.sv){ SV_TONLY(INPUT_TONLY_BAM.out.bamwithsample) - } + } if (params.cnv){ if (params.genome == "mm10"){ CNVmouse_tonly(INPUT_TONLY_BAM.out.bamwithsample) } else if (params.genome== "hg38"){ - VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) - CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) - } - } - - } + if (!params.vc){ + VC_TONLY(INPUT_TONLY_BAM.out.bamwithsample,INPUT_TONLY_BAM.out.splitout,INPUT_TONLY_BAM.out.sample_sheet) + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } else { + CNVhuman_tonly(INPUT_TONLY_BAM.out.bamwithsample,VC_TONLY.out.somaticcall_input) + } + } + } + } } - - - - diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 757465a..0f4a92b 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -21,12 +21,13 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data' DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv' HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' -//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) +//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) //ascatR= -//mm10 Paired-Sequenza, FREEC-tumor only +//mm10 Paired-Sequenza, FREEC-tumor only process seqz_sequenza_bychr { + container = "${params.containers.logan}" label 'process_low' input: @@ -57,13 +58,15 @@ process seqz_sequenza_bychr { process sequenza { + container = "${params.containers.logan}" + label 'process_highcpu' input: tuple val(pairid), path(seqz) output: - tuple val(pairid), + tuple val(pairid), path("${pairid}_alternative_solutions.txt"), path("${pairid}_alternative_fit.pdf"), path("${pairid}_model_fit.pdf"), @@ -83,9 +86,9 @@ process sequenza { //samtools mpileup ${normal} -f $GENOMEREF -Q 20 |gzip > ${normalname}.mpileup.gz //sequenza-utils seqz_binning --seqz --window 50 -o ${sample}_bin50.seqz.gz - shell: + shell: ''' - + zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ sequenza-utils seqz_binning \ -w 100 \ @@ -99,11 +102,11 @@ process sequenza { ''' - stub: - + stub: + """ - touch "${pairid}_alternative_solutions.txt" - touch "${pairid}_alternative_fit.pdf" + touch "${pairid}_alternative_solutions.txt" + touch "${pairid}_alternative_fit.pdf" touch "${pairid}_model_fit.pdf" touch "${pairid}_confints_CP.txt" touch "${pairid}_CN_bars.pdf" @@ -121,13 +124,25 @@ process sequenza { } + process freec_paired { + container = "${params.containers.logan}" label 'process_highcpu' input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai) - shell: """ + output: + tuple val(tumorname), val(normalname), + path("${tumorname}_vs_${normalname}_CNVs.p.value.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt"), + path("${tumorname}_vs_${normalname}_BAF.txt"), + path("${tumorname}_vs_${normalname}_ratio.txt.log2.png"), + path("${tumorname}_vs_${normalname}_ratio.txt.png") + + shell: + """ perl $FREECPAIR_SCRIPT \ . \ @@ -150,29 +165,47 @@ process freec_paired { cat $FREECPLOT | \ R --slave \ --args 2 \ - ${tumorname}_vs_${normalname}.bam_ratio.txt \ - ${tumorname}_vs_${normalname}.bam_BAF.txt + ${tumor}_ratio.txt \ + ${tumor}_BAF.txt + + mv ${tumor}_CNVs.p.value.txt ${tumorname}_vs_${normalname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_vs_${normalname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_vs_${normalname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_vs_${normalname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_vs_${normalname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_vs_${normalname}_ratio.txt.png - """ + """ stub: """ - touch ${tumorname}_vs_${normalname}.bam_CNVs.p.value.txt - touch ${tumorname}_vs_${normalname}.bam_ratio.txt - touch ${tumorname}_vs_${normalname}.bam_BAF.txt - touch ${tumorname}_vs_${normalname}.bam_ratio.txt.log2.png - touch ${tumorname}_vs_${normalname}.bam_ratio.txt.png + touch ${tumorname}_vs_${normalname}_CNVs.p.value.txt + touch ${tumorname}_vs_${normalname}_ratio.txt + touch ${tumorname}_vs_${normalname}_BAF.txt + touch ${tumorname}_vs_${normalname}_BAF.txt.png + touch ${tumorname}_vs_${normalname}_ratio.txt.log2.png + touch ${tumorname}_vs_${normalname}_ratio.txt.png """ } process freec { - label 'process_mid' + container = "${params.containers.logan}" + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) + output: + tuple val(tumorname), + path("${tumorname}_CNVs.p.value.txt"), + path("${tumorname}_ratio.txt"), + path("${tumorname}_BAF.txt"), + path("${tumorname}_ratio.txt.log2.png"), + path("${tumorname}_ratio.txt.png") + + shell: """ perl $FREECSCRIPT \ @@ -198,26 +231,36 @@ process freec { ${tumor}_ratio.txt \ ${tumor}_BAF.txt - """ + mv ${tumor}_CNVs.p.value.txt ${tumorname}_CNVs.p.value.txt + mv ${tumor}_ratio.txt ${tumorname}_ratio.txt + mv ${tumor}_BAF.txt ${tumorname}_BAF.txt + mv ${tumor}_BAF.txt.png ${tumorname}_BAF.txt.png + mv ${tumor}_ratio.txt.log2.png ${tumorname}_ratio.txt.log2.png + mv ${tumor}_ratio.txt.png ${tumorname}_ratio.txt.png + + """ stub: """ - touch ${tumor}_CNVs.p.value.txt - touch ${tumor}_ratio.txt - touch ${tumor}_BAF.txt - touch ${tumor}_ratio.txt.log2.png - touch ${tumor}_ratio.txt.png + touch ${tumorname}_CNVs.p.value.txt + touch ${tumorname}_ratio.txt + touch ${tumorname}_BAF.txt + touch ${tumorname}_BAF.txt.png + touch ${tumorname}_ratio.txt.log2.png + touch ${tumorname}_ratio.txt.png """ } process amber_tonly { - label 'process_mid' + container = "${params.containers.logan}" + + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumorname}_amber") @@ -243,13 +286,15 @@ process amber_tonly { """ mkdir ${tumorname}_amber - touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc + touch ${tumorname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_amber/${tumorname}.amber.qc """ } process amber_tn { - label 'process_mid' - + container = "${params.containers.logan}" + + label 'process_medium' + input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) @@ -279,19 +324,21 @@ process amber_tn { """ mkdir ${tumorname}_vs_${normalname}_amber - touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc + touch ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.tsv.gz ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.baf.pcf ${tumorname}_vs_${normalname}_amber/${tumorname}.amber.qc """ } process cobalt_tonly { - label "process_mid" + container = "${params.containers.logan}" + + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai) output: tuple val(tumorname), path("${tumorname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), + //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), //path("${samplename}/${samplename}.cobalt.ratio.pcf"), //path("${samplename}/${samplename}.cobalt.gc.median.tsv") @@ -317,7 +364,9 @@ process cobalt_tonly { } process cobalt_tn { - label "process_mid" + container = "${params.containers.logan}" + + label 'process_medium' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -325,7 +374,7 @@ process cobalt_tn { output: tuple val(tumorname), path("${tumorname}_vs_${normalname}_cobalt") - //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), + //path("${samplename}/${samplename}.cobalt.ratio.tsv.gz"), //path("${samplename}/${samplename}.cobalt.ratio.pcf"), //path("${samplename}/${samplename}.cobalt.gc.median.tsv") @@ -353,12 +402,12 @@ process cobalt_tn { process purple { - label 'process_mid' - publishDir("${outdir}/cnv/purple", mode: 'copy') + container = "${params.containers.logan}" + label 'process_medium' input: tuple val(tumorname), - path(cobaltin), + path(cobaltin), path(amberin), path(somaticvcf), path(somaticvcfindex) @@ -396,8 +445,6 @@ process purple { process ascat_tn { module=["java/12.0.1","R/3.6.3"] - publishDir("${outdir}/purple", mode: 'copy') - input: tuple val(samplename), path(cobaltin), path(amberin), path("${samplename}.tonly.final.mut2.vcf.gz") @@ -436,4 +483,3 @@ process ascat_tn { } */ - diff --git a/modules/local/germline.nf b/modules/local/germline.nf index c106683..285a0f3 100644 --- a/modules/local/germline.nf +++ b/modules/local/germline.nf @@ -5,15 +5,15 @@ MODEL="/opt/models/wgs/model.ckpt" //Processes //Deep Variant process deepvariant_step1 { - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) - + output: - tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), + tuple val(samplename), path("outputshard/${samplename}.tfrecord_${bed}.gz"), path("gvcf/${samplename}.gvcf.tfrecord_${bed}.gz") - script: + script: """ mkdir -p outputshard mkdir -p gvcf @@ -24,7 +24,7 @@ process deepvariant_step1 { --reads ${samplename}.bam \ --channels insert_size \ --examples outputshard/${samplename}.tfrecord_${bed}.gz \ - --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz + --gvcf gvcf/${samplename}.gvcf.tfrecord_${bed}.gz """ stub: @@ -32,23 +32,22 @@ process deepvariant_step1 { mkdir -p outputshard mkdir -p gvcf touch outputshard/${samplename}.tfrecord_${bed}.gz - touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz + touch gvcf/${samplename}.gvcf.tfrecord_${bed}.gz """ } //Step 2 requires GPU process deepvariant_step2 { - - + input: tuple val(samplename), path(tfrecords), path(tfgvcf) - + output: - tuple val(samplename), path(tfrecords), + tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf) - script: + script: """ call_variants \ @@ -69,17 +68,16 @@ process deepvariant_step2 { //Step 3 DV process deepvariant_step3 { - input: tuple val(samplename), path(tfrecords), path("${samplename}_call_variants_output.tfrecord.gz"), path(tfgvcf) - + output: tuple val(samplename), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi"), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") - script: + script: """ postprocess_variants \ --ref $GENOMEREF \ @@ -101,16 +99,15 @@ process deepvariant_step3 { //Combined DeepVariant process deepvariant_combined { - input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - + output: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi"), path("${samplename}.vcf.gz"), path("${samplename}.vcf.gz.tbi") - script: + script: """ run_deepvariant \ --model_type=WGS \ @@ -118,7 +115,7 @@ process deepvariant_combined { --reads=${samplename}.bam \ --output_gvcf= ${samplename}.gvcf.gz \ --output_vcf=${samplename}.vcf.gz \ - --num_shards=16 + --num_shards=16 """ @@ -126,7 +123,7 @@ process deepvariant_combined { """ touch ${samplename}.vcf.gz ${samplename}.vcf.gz.tbi touch ${samplename}.gvcf.gz ${samplename}.gvcf.gz.tbi - + """ @@ -134,20 +131,19 @@ process deepvariant_combined { process glnexus { - input: path(gvcfs) - + output: - tuple path("germline.v.bcf"), + tuple path("germline.v.bcf"), path("germline.norm.vcf.gz"),path("germline.norm.vcf.gz.tbi") - script: + script: """ glnexus_cli --config DeepVariant_unfiltered \ *.gvcf.gz --threads 8 > germline.v.bcf - + bcftools norm \ -m - \ -Oz \ @@ -160,18 +156,13 @@ process glnexus { -f -t \ --threads 8 \ germline.norm.vcf.gz - + """ stub: """ touch germline.v.bcf - touch germline.norm.vcf.gz + touch germline.norm.vcf.gz touch germline.norm.vcf.gz.tbi """ } - - - - - diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 82bcc1a..f8bbc89 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -13,19 +13,21 @@ ANCESTRY_DB=file(params.genomes[params.genome].somalier_ancestrydb) SCRIPT_PATH_GENDER = file(params.script_genderPrediction) SCRIPT_PATH_SAMPLES = file(params.script_combineSamples) SCRIPT_PATH_PCA = file(params.script_ancestry) - -//OUTPUT DIRECTORY + +//OUTPUT DIRECTORY process fc_lane { + container = "${params.containers.logan}" + label 'process_low' input: tuple val(samplename), path(fqs) - output: + output: tuple val(samplename), - path("${samplename}.fastq.info.txt") - + path("${samplename}.fastq.info.txt") + script: GET_FLOWCELL_LANES=file(params.get_flowcell_lanes) @@ -35,7 +37,7 @@ process fc_lane { ${samplename} > ${samplename}.fastq.info.txt """ - stub: + stub: """ touch ${samplename}.fastq.info.txt """ @@ -61,7 +63,7 @@ process fastq_screen { path("${samplename}.R2.trimmed_screen.png"), path("${samplename}.R2.trimmed_screen.txt") - script: + script: FASTQ_SCREEN_CONF=file(params.fastq_screen_conf) """ @@ -75,7 +77,7 @@ process fastq_screen { """ - stub: + stub: """ touch ${samplename}.R1.trimmed_screen.html ${samplename}.R1.trimmed_screen.png touch ${samplename}.R1.trimmed_screen.txt ${samplename}.R2.trimmed_screen.html @@ -92,11 +94,11 @@ process kraken { @Input: Trimmed FastQ files (scatter) @Output: - Kraken logfile and interative krona report + Kraken logfile and interactive krona report */ - + input: - tuple val(samplename), + tuple val(samplename), path(fqs) output: @@ -104,19 +106,19 @@ process kraken { //path("${samplename}.trimmed.kraken_bacteria.out.txt"), path("${samplename}.trimmed.kraken_bacteria.taxa.txt"), path("${samplename}.trimmed.kraken_bacteria.krona.html") - - script: + + script: """ #Setups temporary directory for - #intermediate files with built-in + #intermediate files with built-in #mechanism for deletion on exit - - + + # Copy kraken2 db to local node storage to reduce filesystem strain cp -rv $BACDB . kdb_base=\$(basename $BACDB) - + kraken2 --db $BACDB \ --threads 16 --report ${samplename}.trimmed.kraken_bacteria.taxa.txt \ --output - \ @@ -127,7 +129,7 @@ process kraken { ktImportTaxonomy - -o ${samplename}.trimmed.kraken_bacteria.krona.html """ - stub: + stub: """ touch ${samplename}.trimmed.kraken_bacteria.taxa.txt ${samplename}.trimmed.kraken_bacteria.krona.html """ @@ -155,18 +157,18 @@ process fastqc { //threads: 8 //module=['fastqc/0.11.9'] - script: + script: """ mkdir -p fastqc fastqc -t 8 \ -f bam \ -o fastqc \ - ${samplename}.bqsr.bam + ${samplename}.bqsr.bam mv fastqc/${samplename}.bqsr_fastqc.html ${samplename}_fastqc.html mv fastqc/${samplename}.bqsr_fastqc.zip ${samplename}_fastqc.zip """ - stub: + stub: """ touch ${samplename}_fastqc.html ${samplename}_fastqc.zip """ @@ -174,7 +176,7 @@ process fastqc { process qualimap_bamqc { /* - Quality-control step to assess various post-alignment metrics + Quality-control step to assess various post-alignment metrics and a secondary method to calculate insert size. Please see QualiMap's website for more information about BAM QC: http://qualimap.conesalab.org/ @@ -182,15 +184,15 @@ process qualimap_bamqc { Recalibrated BAM file (scatter) @Output: Report containing post-aligment quality-control metrics - */ + */ input: tuple val(samplename), path(bam), path(bai) - output: + output: tuple path("${samplename}_genome_results.txt"), path("${samplename}_qualimapReport.html") - script: + script: """ unset DISPLAY qualimap bamqc -bam ${bam} \ @@ -215,31 +217,31 @@ process qualimap_bamqc { process samtools_flagstats { /* - Quality-control step to assess alignment quality. Flagstat provides - counts for each of 13 categories based primarily on bit flags in the - FLAG field. Information on the meaning of the flags is given in the + Quality-control step to assess alignment quality. Flagstat provides + counts for each of 13 categories based primarily on bit flags in the + FLAG field. Information on the meaning of the flags is given in the SAM specification: https://samtools.github.io/hts-specs/SAMv1.pdf @Input: Recalibrated BAM file (scatter) @Output: Text file containing alignment statistics */ - label 'process_mid' - + label 'process_medium' + input: tuple val(samplename), path(bam), path(bai) - + output: path("${samplename}.samtools_flagstat.txt") - script: + script: """ samtools flagstat ${bam} > ${samplename}.samtools_flagstat.txt """ stub: """ - touch ${samplename}.samtools_flagstat.txt + touch ${samplename}.samtools_flagstat.txt """ } @@ -260,7 +262,7 @@ process mosdepth { */ input: tuple val(samplename), path(bam), path(bai) - + output: path("${samplename}.mosdepth.region.dist.txt"), path("${samplename}.mosdepth.summary.txt"), @@ -268,7 +270,7 @@ process mosdepth { path("${samplename}.regions.bed.gz.csi") - script: + script: """ mosdepth -n --fast-mode --by 500 ${samplename} ${bam} -t $task.cpus """ @@ -282,28 +284,28 @@ process mosdepth { """ } -process vcftools { +process vcftools { /* - Quality-control step to calculates a measure of heterozygosity on + Quality-control step to calculates a measure of heterozygosity on a per-individual basis. The inbreeding coefficient, F, is estimated for each individual using a method of moments. Please see VCFtools - documentation for more information: + documentation for more information: https://vcftools.github.io/man_latest.html @Input: Multi-sample gVCF file (indirect-gather-due-to-aggregation) @Output: Text file containing a measure of heterozygosity */ - label 'process_mid' + label 'process_medium' + - - input: + input: tuple path(germlinevcf),path(germlinetbi) - output: + output: path("variants_raw_variants.het") - - - script: + + + script: """ vcftools --gzvcf ${germlinevcf} --het --out variants_raw_variants """ @@ -323,16 +325,16 @@ process collectvariantcallmetrics { @Input: Multi-sample gVCF file (indirect-gather-due-to-aggregation) @Output: - Text file containing a collection of metrics relating to snps and indels - */ - input: + Text file containing a collection of metrics relating to snps and indels + */ + input: tuple path(germlinevcf),path(germlinetbi) - - output: + + output: tuple path("raw_variants.variant_calling_detail_metrics"), path("raw_variants.variant_calling_summary_metrics") - + script: """ java -Xmx24g -jar \${PICARDJARPATH}/picard.jar \ @@ -341,7 +343,7 @@ process collectvariantcallmetrics { OUTPUT= "raw_variants" \ DBSNP=$DBSNP Validation_Stringency=SILENT """ - + stub: """ touch raw_variants.variant_calling_detail_metrics raw_variants.variant_calling_summary_metrics @@ -354,9 +356,9 @@ process bcftools_stats { /* Quality-control step to collect summary statistics from bcftools stats. When bcftools stats is run with one VCF file then stats by non-reference - allele frequency, depth distribution, stats by quality and per-sample - counts, singleton statsistics are calculated. Please see bcftools' - documentation for more information: + allele frequency, depth distribution, stats by quality and per-sample + counts, singleton statsistics are calculated. Please see bcftools' + documentation for more information: http://samtools.github.io/bcftools/bcftools.html#stats @Input: Per sample gVCF file (scatter) @@ -364,14 +366,14 @@ process bcftools_stats { Text file containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' input: tuple val(samplename), path("${samplename}.gvcf.gz"),path("${samplename}.gvcf.gz.tbi") output: path("${samplename}.germline.bcftools_stats.txt") - - script: + + script: """ bcftools stats ${samplename}.gvcf.gz > ${samplename}.germline.bcftools_stats.txt """ @@ -385,22 +387,22 @@ process bcftools_stats { process gatk_varianteval { /* - Quality-control step to calculate various quality control metrics from a - variant callset. These metrics include the number of raw or filtered SNP + Quality-control step to calculate various quality control metrics from a + variant callset. These metrics include the number of raw or filtered SNP counts; ratio of transition mutations to transversions; concordance of a particular sample's calls to a genotyping chip; number of s per sample. - Please see GATK's documentation for more information: + Please see GATK's documentation for more information: https://gatk.broadinstitute.org/hc/en-us/articles/360040507171-VariantEval @Input: Per sample gVCF file (scatter) @Output: Evaluation table containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' - input: + input: tuple val(samplename), path("${samplename}.gvcf.gz") ,path("${samplename}.gvcf.gz.tbi") - output: + output: path("${samplename}.germline.eval.grp") //params: // rname = "vareval", @@ -410,7 +412,7 @@ process gatk_varianteval { //message: "Running GATK4 VariantEval on '{input.vcf}' input file" //container: config['images']['wes_base'] //threads: 16 - script: + script: """ gatk --java-options '-Xmx12g -XX:ParallelGCThreads=16' VariantEval \ -R $GENOMEREF \ @@ -431,23 +433,23 @@ process snpeff { /* Data processing and quality-control step to annotate variants, predict its functional effects, and collect various summary statistics about variants and - their annotations. Please see SnpEff's documentation for more information: + their annotations. Please see SnpEff's documentation for more information: https://pcingola.github.io/SnpEff/ @Input: Per sample gVCF file (scatter) @Output: Evaluation table containing a collection of summary statistics */ - label 'process_mid' + label 'process_medium' - input: + input: tuple val(samplename), path("${samplename}.gvcf.gz"), path("${samplename}.gvcf.gz.tbi") - output: + output: tuple path("${samplename}.germline.snpeff.ann.vcf"), path("${samplename}.germline.snpeff.ann.csv"), path("${samplename}.germline.snpeff.ann.html") - script: + script: """ java -Xmx12g -jar \$SNPEFF_JAR \ -v -canon -c $SNPEFF_CONFIG \ @@ -478,15 +480,15 @@ process somalier_extract { input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - output: + output: path("output/${samplename}.somalier") //params: // sites_vcf = config['references']['SOMALIER']['SITES_VCF'], // genomeFasta = config['references']['GENOME'], // rname = 'somalier_extract' //container: config['images']['wes_base'] - script: - """ + script: + """ mkdir -p output somalier extract \ -d output \ @@ -498,7 +500,7 @@ process somalier_extract { stub: """ mkdir -p output - touch output/${samplename}.somalier + touch output/${samplename}.somalier """ } @@ -518,21 +520,21 @@ process somalier_analysis_human { input: path(somalierin) - + output: tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"), path("ancestry.somalier-ancestry.tsv"), path("predicted.genders.tsv"), path("predicted.pairs.tsv"), path("sampleAncestryPCAPlot.html"), path("predictedPairsAncestry.pdf") - + script: - """ + """ echo "Estimating relatedness" somalier relate \ -o "relatedness" \ $somalierin - + echo "Estimating ancestry" somalier ancestry \ -o "ancestry" \ @@ -542,19 +544,19 @@ process somalier_analysis_human { Rscript $SCRIPT_PATH_GENDER \ relatedness.samples.tsv \ - predicted.genders.tsv - + predicted.genders.tsv + Rscript $SCRIPT_PATH_SAMPLES \ relatedness.pairs.tsv \ predicted.pairs.tsv - + Rscript $SCRIPT_PATH_PCA \ ancestry.somalier-ancestry.tsv \ predicted.pairs.tsv \ sampleAncestryPCAPlot.html \ predictedPairsAncestry.pdf """ - + stub: """ @@ -581,30 +583,30 @@ process somalier_analysis_mouse { input: path(somalierin) - + output: - tuple path("relatedness.pairs.tsv"), + tuple path("relatedness.pairs.tsv"), path("relatedness.samples.tsv"), path("predicted.genders.tsv"), path("predicted.pairs.tsv") - + script: - """ + """ echo "Estimating relatedness" somalier relate \ -o "relatedness" \ $somalierin - + Rscript $SCRIPT_PATH_GENDER \ relatedness.samples.tsv \ - predicted.genders.tsv - + predicted.genders.tsv + Rscript $SCRIPT_PATH_SAMPLES \ relatedness.pairs.tsv \ predicted.pairs.tsv - + """ - + stub: """ @@ -612,7 +614,7 @@ process somalier_analysis_mouse { touch relatedness.samples.tsv touch predicted.genders.tsv touch predicted.pairs.tsv - + """ } @@ -620,23 +622,23 @@ process multiqc { """ Reporting step to aggregate sample summary statistics and quality-control - information across all samples. This will be one of the last steps of the - pipeline. The inputs listed here are to ensure that this step runs last. - During runtime, MultiQC will recurively crawl through the working directory + information across all samples. This will be one of the last steps of the + pipeline. The inputs listed here are to ensure that this step runs last. + During runtime, MultiQC will recursively crawl through the working directory and parse files that it supports. @Input: List of files to ensure this step runs last (gather) @Output: Interactive MulitQC report and a QC metadata table """ - - input: + + input: path(allqcin) - output: + output: path("MultiQC_Report.html") - script: + script: """ multiqc . \ diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index a6f58f4..dda67c7 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -1,7 +1,7 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOME=params.genome BWAGENOME=file(params.genomes[params.genome].bwagenome) -DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) +DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) @@ -10,7 +10,7 @@ process svaba_somatic { input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.bps.txt.gz"), @@ -34,7 +34,7 @@ process svaba_somatic { """ stub: - + """ touch "${tumor.simpleName}.bps.txt.gz" touch "${tumor.simpleName}.contigs.bam" @@ -61,7 +61,7 @@ process manta_somatic { input: tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.diplodSV.vcf.gz"), @@ -80,7 +80,7 @@ process manta_somatic { --runDir=wd wd/runWorkflow.py -m local -j 10 -g 10 - + mv wd/results/variants/diploidSV.vcf.gz ${tumor.simpleName}.diplodSV.vcf.gz mv wd/results/variants/somaticSV.vcf.gz ${tumor.simpleName}.somaticSV.vcf.gz mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz @@ -89,7 +89,7 @@ process manta_somatic { """ stub: - + """ touch ${tumor.simpleName}.diplodSV.vcf.gz touch ${tumor.simpleName}.somaticSV.vcf.gz @@ -140,7 +140,7 @@ process manta_tonly { input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.candidateSV.vcf.gz"), @@ -158,7 +158,7 @@ process manta_tonly { --runDir=wd wd/runWorkflow.py -m local -j 10 -g 10 - + mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz mv wd/results/variants/tumorSV.vcf.gz ${tumor.simpleName}.tumorSV.vcf.gz @@ -166,7 +166,7 @@ process manta_tonly { """ stub: - + """ touch ${tumor.simpleName}.candidateSV.vcf.gz touch ${tumor.simpleName}.candidateSmallIndels.vcf.gz @@ -182,7 +182,7 @@ process svaba_tonly { input: tuple val(tumorname), path(tumor), path(tumorbai) - + output: tuple val(tumorname), path("${tumor.simpleName}.bps.txt.gz"), @@ -202,7 +202,7 @@ process svaba_tonly { """ stub: - + """ touch "${tumor.simpleName}.bps.txt.gz" touch "${tumor.simpleName}.contigs.bam" @@ -221,11 +221,11 @@ process svaba_tonly { process gunzip { input: - tuple val(tumorname), + tuple val(tumorname), path(vcf), val(sv) output: - tuple val(tumorname), + tuple val(tumorname), path("${tumorname}.tumorSV.vcf"), val(sv) script: @@ -246,7 +246,7 @@ process survivor_sv { module = ['survivor'] input: - tuple val(tumorname), + tuple val(tumorname), path(vcfs),val(svs) output: @@ -275,9 +275,7 @@ process survivor_sv { process annotsv_tonly { //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files //Requires bedtools,bcftools - module = ['annotsv/3.3.1'] - publishDir(path: "${outdir}/SV/annotated_tonly", mode: 'copy') input: tuple val(tumorname), path(somaticvcf), val(sv) @@ -306,4 +304,4 @@ process annotsv_tonly { touch "${sv}/${tumorname}.tsv" touch "${sv}/${tumorname}.unannotated.tsv" """ -} \ No newline at end of file +} diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 4fa34db..1ceb063 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -3,12 +3,13 @@ KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL process fastp { - label 'process_mid' + container = "${params.containers.logan}" + label 'process_medium' tag { name } input: tuple val(samplename), path(fqs) - + output: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), @@ -40,15 +41,16 @@ process fastp { process bwamem2 { + container = "${params.containers.logan}" tag { name } - + input: - tuple val(samplename), + tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), path("${samplename}.R2.trimmed.fastq.gz"), path("${samplename}.fastp.json"), path("${samplename}.fastp.html") - + output: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") @@ -77,9 +79,9 @@ process bwamem2 { process bqsr { /* - Base quality recalibration for all samples - */ - + Base quality recalibration for all samples + */ + container = "${params.containers.logan}" label 'process_low' input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path(bed) @@ -99,19 +101,20 @@ process bqsr { stub: """ - touch ${samplename}_${bed.simpleName}.recal_data.grp + touch ${samplename}_${bed.simpleName}.recal_data.grp """ } process gatherbqsr { + container = "${params.containers.logan}" label 'process_low' - input: + input: tuple val(samplename), path(recalgroups) output: tuple val(samplename), path("${samplename}.recal_data.grp") script: - + strin = recalgroups.join(" --input ") """ @@ -131,10 +134,11 @@ process gatherbqsr { process applybqsr { /* - Base quality recalibration for all samples to - */ + Base quality recalibration for all samples to + */ + container = "${params.containers.logan}" label 'process_low' - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp") @@ -154,7 +158,7 @@ process applybqsr { """ stub: - + """ touch ${samplename}.bqsr.bam ${samplename}.bqsr.bai """ @@ -164,12 +168,12 @@ process applybqsr { process samtoolsindex { - label 'process_mid' - publishDir(path: "${outdir}/bams/BQSR", mode: 'copy') - + container = "${params.containers.logan}" + label 'process_medium' + input: tuple val(bamname), path(bam) - + output: tuple val(bamname), path(bam), path("${bam}.bai") @@ -187,9 +191,10 @@ process samtoolsindex { //Save to CRAM for output process bamtocram_tonly { - label 'process_mid' - - input: + container = "${params.containers.logan}" + label 'process_medium' + + input: tuple val(tumorname), path(tumor), path(tumorbai) output: @@ -204,27 +209,27 @@ process bamtocram_tonly { /* process indelrealign { - //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, + //Briefly, RealignerTargetCreator runs faster with increasing -nt threads, //while IndelRealigner shows diminishing returns for increasing scatter - + tag { name } - + input: tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") output: tuple val(samplename), path("${samplename}.ir.bam") - script: - + script: + """ /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \ -I ${samplename}.bam \ -R ${GENOMEREF} \ -o ${samplename}.intervals \ -nt 16 \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} - + -known ${MILLSINDEL} -known ${SHAPEITINDEL} + /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \ -R ${GENOMEREF} \ -I ${samplename}.bam \ @@ -234,11 +239,11 @@ process indelrealign { -targetIntervals ${samplename}.intervals \ -o ${samplename}.ir.bam """ - + stub: """ - touch ${samplename}.ir.bam + touch ${samplename}.ir.bam """ } diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index e23cd0e..ef25413 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -1,10 +1,10 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -PON=file(params.genomes[params.genome].pon) +KGPGERMLINE=params.genomes[params.genome].kgp +DBSNP=file(params.genomes[params.genome].dbsnp) +GNOMADGERMLINE=params.genomes[params.genome].gnomad +PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild @@ -15,20 +15,21 @@ LOFREQ_CONVERT=params.lofreq_convert process mutect2 { + container "${params.containers.logan}" label 'process_somaticcaller' input: tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai), + val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats") - + script: """ gatk Mutect2 \ @@ -55,12 +56,13 @@ process mutect2 { process pileup_paired_t { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") @@ -71,7 +73,7 @@ process pileup_paired_t { -I ${tumor} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table """ @@ -84,12 +86,13 @@ process pileup_paired_t { process pileup_paired_n { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.normal.pileup.table") @@ -100,10 +103,10 @@ process pileup_paired_n { -I ${normal} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table """ - + stub: """ touch ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table @@ -113,13 +116,14 @@ process pileup_paired_n { process contamination_paired { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(tumorname), path(tumor_pileups), path(normal_pileups) - + output: tuple val(tumorname), path("${tumorname}_allpileups.table"), @@ -137,7 +141,7 @@ process contamination_paired { gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${alltumor} -O ${tumorname}_allpileups.table - + gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${allnormal} -O ${tumorname}_normal.allpileups.table @@ -161,20 +165,21 @@ process contamination_paired { touch ${tumorname}_normal.contamination.table """ - + } process learnreadorientationmodel { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(f1r2) - + output: tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - script: + script: f1r2in = f1r2.join(" --input ") """ @@ -191,15 +196,16 @@ process learnreadorientationmodel { process mergemut2stats { + container "${params.containers.logan}" label 'process_low' input: tuple val(sample), path(stats) - + output: tuple val(sample), path("${sample}.final.stats") - script: + script: statsin = stats.join(" --stats ") """ @@ -217,17 +223,18 @@ process mergemut2stats { process mutect2filter { - label 'process_mid' - + container "${params.containers.logan}" + label 'process_medium' + input: - tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), + tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) - + output: - tuple val("${tumor}_vs_${normal}"), - path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), + tuple val("${tumor}_vs_${normal}"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), - path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -247,7 +254,7 @@ process mutect2filter { --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ --exclude-filtered \ --output ${tumor}_vs_${normal}.mut2.final.vcf.gz - + bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ @@ -267,18 +274,19 @@ process mutect2filter { process strelka_tn { + container "${params.containers.logan}" label 'process_highcpu' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") - + script: """ @@ -296,21 +304,21 @@ process strelka_tn { ./wd/runWorkflow.py -m local -j $task.cpus mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz - - printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname - + + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz - + bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + """ stub: - + """ touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi @@ -321,16 +329,17 @@ process strelka_tn { process vardict_tn { - label 'process_highcpu' + container "${params.containers.logan}" + label 'process_somaticcaller_high' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz") - //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and - //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” + //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and + //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” script: """ @@ -350,8 +359,8 @@ process vardict_tn { -S \ -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf - printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname - + printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \ | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz @@ -359,7 +368,7 @@ process vardict_tn { """ stub: - + """ touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz @@ -370,19 +379,20 @@ process vardict_tn { process varscan_tn { + container "${params.containers.logan}" label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed), val(tumor1), - path(tumorpileup), path(normalpileup), + path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table) - + output: tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz") - + shell: ''' tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) @@ -402,8 +412,8 @@ process varscan_tn { -R !{GENOMEREF} -SD !{GENOMEDICT} \ -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf - printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname - + printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname + bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \ | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz @@ -418,17 +428,18 @@ process varscan_tn { process octopus_tn { - //label 'process_highcpu' Using separate docker for octopus + container "${params.containers.octopus}" + label 'process_somaticcaller_high' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") - + script: """ @@ -443,32 +454,33 @@ process octopus_tn { """ stub: - + """ touch "${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz" """ -} +} process lofreq_tn { - label 'process_somaticcaller' + container "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) - + output: - - tuple val(tumorname), val(normalname), + + tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi") - + script: """ @@ -478,16 +490,16 @@ process lofreq_tn { -l ${bed} \ --call-indels \ -o ${tumorname}_vs_${normalname}_${bed.simpleName}_ - + bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \ -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz - + bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead - + sed 's/^##FORMAT=/##FORMAT=/' temphead > temphead1 bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\ bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz @@ -497,71 +509,73 @@ process lofreq_tn { """ stub: - + """ touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi" - + """ -} +} process muse_tn { - label 'process_somaticcaller' + container "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) - + output: tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}.vcf.gz") - + script: """ MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G - + bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz - printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname - + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \ | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz """ stub: - + """ touch "${tumorname}_vs_${normalname}.vcf.gz" """ -} +} process combineVariants { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(inputvcf), val(vc) - + output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), path("${vc}/${sample}.${vc}.norm.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") script: vcfin = inputvcf.join(" -I ") - + """ mkdir ${vc} gatk --java-options "-Xmx48g" SortVcf \ @@ -596,21 +610,22 @@ process combineVariants { process combineVariants_alternative { + container "${params.containers.logan}" label 'process_highmem' input: tuple val(sample), path(vcfs), path(vcfsindex), val(vc) - + output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), - path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), + tuple val(sample), + path("${vc}/${sample}.${vc}.marked.vcf.gz"), + path("${vc}/${sample}.${vc}.marked.vcf.gz.tbi"), path("${vc}/${sample}.${vc}.norm.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz.tbi") - + script: vcfin = vcfs.join(" ") - + """ mkdir ${vc} bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz @@ -636,13 +651,14 @@ process combineVariants_alternative { touch ${vc}/${sample}.${vc}.norm.vcf.gz touch ${vc}/${sample}.${vc}.marked.vcf.gz.tbi touch ${vc}/${sample}.${vc}.norm.vcf.gz.tbi - + """ } process bcftools_index_octopus { + container "${params.containers.logan}" label 'process_low' input: @@ -651,10 +667,10 @@ process bcftools_index_octopus { output: tuple val(tumor), - path(vcf), + path(vcf), path("${vcf}.tbi") - - script: + + script: """ bcftools index -t ${vcf} """ @@ -670,32 +686,33 @@ process bcftools_index_octopus { process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels - label 'process_mid' + container "${params.containers.logan}" + label 'process_medium' input: - tuple val(sample), + tuple val(sample), path(strelkasnvs), path(snvindex), path(strelkaindels), path(indelindex) - + output: - tuple val(sample), + tuple val(sample), path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"), path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi") - - + + script: - + vcfin = strelkasnvs.join(" ") indelsin = strelkaindels.join(" ") """ - bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a + bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz - bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz + bcftools sort ${sample}.temp1.strelka.vcf.gz -Oz -o ${sample}.strelka.vcf.gz bcftools view ${sample}.strelka.vcf.gz --threads $task.cpus -f PASS -Oz -o ${sample}.filtered.strelka.vcf.gz @@ -708,16 +725,17 @@ process combineVariants_strelka { """ touch ${sample}.strelka.vcf.gz ${sample}.strelka.vcf.gz.tbi touch ${sample}.filtered.strelka.vcf.gz ${sample}.filtered.strelka.vcf.gz.tbi - + """ } process somaticcombine { - label 'process_mid' + container "${params.containers.logan}" + label 'process_medium' - input: + input: tuple val(tumorsample), val(normal), val(callers), path(vcfs), path(vcfindex) @@ -730,7 +748,7 @@ process somaticcombine { script: vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - + """ java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ -R $GENOMEREF \ @@ -753,10 +771,13 @@ process somaticcombine { } -process annotvep_tn { +process annotvep_tn { + label 'process_medium' + container "${params.containers.vcf2maf}" + input: - tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf), path(vcfindex) + tuple val(tumorsample), val(normalsample), + val(vc), path(tumorvcf), path(vcfindex) output: path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") @@ -771,15 +792,15 @@ process annotvep_tn { NORM_VCF_ID_ARG="" NSAMPLES=${#VCF_SAMPLE_IDS[@]} if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and + # Assign tumor, normal IDs + # Look through column names and # see if they match provided IDs for (( i = 0; i < $NSAMPLES; i++ )); do echo "${VCF_SAMPLE_IDS[$i]}" if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then TID_IDX=$i fi - + if [ "${VCF_SAMPLE_IDS[$i]}" == !{normalsample} ]; then NID_IDX=$i fi @@ -791,9 +812,9 @@ process annotvep_tn { fi fi VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - + zcat !{tumorvcf} > !{tumorvcf.baseName} - + mkdir -p paired/!{vc} vcf2maf.pl \ @@ -817,10 +838,10 @@ process annotvep_tn { process combinemafs_tn { + container "${params.containers.logan}" label 'process_low' - publishDir(path: "${outdir}/mafs/paired", mode: 'copy') - input: + input: path(allmafs) output: @@ -840,5 +861,3 @@ process combinemafs_tn { touch final_tn.maf """ } - - diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 9f8bf93..c063f2a 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -1,10 +1,10 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) -GNOMADGERMLINE=params.genomes[params.genome].gnomad -PON=file(params.genomes[params.genome].pon) +KGPGERMLINE=params.genomes[params.genome].kgp +DBSNP=file(params.genomes[params.genome].dbsnp) +GNOMADGERMLINE=params.genomes[params.genome].gnomad +PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild @@ -13,11 +13,13 @@ GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest process pileup_paired_tonly { + container "${params.containers.logan}" + label 'process_highmem' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") @@ -29,7 +31,7 @@ process pileup_paired_tonly { -I ${tumor} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table """ @@ -43,6 +45,8 @@ process pileup_paired_tonly { process contamination_tumoronly { + container "${params.containers.logan}" + label 'process_highmem' input: @@ -63,7 +67,7 @@ process contamination_tumoronly { gatk GatherPileupSummaries \ --sequence-dictionary $GENOMEDICT \ -I ${alltumor} -O ${tumorname}_allpileups.table - + gatk CalculateContamination \ -I ${tumorname}_allpileups.table \ -O ${tumorname}.contamination.table @@ -81,15 +85,17 @@ process contamination_tumoronly { process learnreadorientationmodel_tonly { + container "${params.containers.logan}" + label 'process_highmem' input: tuple val(sample), path(f1r2) - + output: tuple val(sample), path("${sample}.read-orientation-model.tar.gz") - script: + script: f1r2in = f1r2.join(" --input ") """ @@ -109,15 +115,17 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { + container "${params.containers.logan}" + label 'process_low' input: tuple val(sample), path(stats) - + output: tuple val(sample), path("${sample}.final.stats") - script: + script: statsin = stats.join(" --stats ") """ @@ -136,16 +144,18 @@ process mergemut2stats_tonly { process mutect2_t_tonly { + container "${params.containers.logan}" label 'process_somaticcaller' + input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz"), path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"), path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") - + script: """ @@ -158,7 +168,7 @@ process mutect2_t_tonly { --panel-of-normals $PON \ --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \ --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \ - --independent-mates + --independent-mates """ stub: @@ -174,14 +184,15 @@ process mutect2_t_tonly { process mutect2filter_tonly { - label 'process_mid' + container "${params.containers.logan}" + label 'process_medium' input: tuple val(sample), path(mutvcfs), path(stats), path(obs), path(pileups),path(tumorcontamination) output: - tuple val(sample), - path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), - path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), + tuple val(sample), + path("${sample}.tonly.mut2.marked.vcf.gz"),path("${sample}.tonly.mut2.marked.vcf.gz.tbi"), + path("${sample}.tonly.mut2.norm.vcf.gz"),path("${sample}.tonly.mut2.norm.vcf.gz.tbi"), path("${sample}.tonly.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -190,8 +201,8 @@ process mutect2filter_tonly { """ - gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz - gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz + gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz + gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz gatk FilterMutectCalls \ -R $GENOMEREF \ -V ${sample}.tonly.concat.vcf.gz \ @@ -207,7 +218,7 @@ process mutect2filter_tonly { --output ${sample}.tonly.mut2.final.vcf.gz bcftools sort ${sample}.tonly.mut2.final.vcf.gz |\ - bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + bcftools norm --threads ${task.cpus} --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' |\ bcftools view - -Oz -o ${sample}.tonly.mut2.norm.vcf.gz @@ -225,16 +236,18 @@ process mutect2filter_tonly { process varscan_tonly { + container "${params.containers.logan}" + label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai), + tuple val(tumorname), path(tumor), path(tumorbai), path(bed), path(tumorpileup), path(tumor_con_table) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz") - + shell: ''' @@ -247,8 +260,8 @@ process varscan_tonly { awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf_temp \ | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf - printf "TUMOR\t!{tumorname}\n" > sampname - + printf "TUMOR\t!{tumorname}\n" > sampname + bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz @@ -263,14 +276,16 @@ process varscan_tonly { process vardict_tonly { - label 'process_highcpu' + container "${params.containers.logan}" + label 'process_somaticcaller_high' + input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz") - + script: """ @@ -281,7 +296,7 @@ process vardict_tonly { -x 500 \ --nosv \ -b ${tumor} --fisher \ - -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \ + -t -Q 20 -c 1 -S 2 -E 3 --th ${task.cpus} \ temp_${bed} | var2vcf_valid.pl \ -N ${tumor} \ -Q 20 \ @@ -291,15 +306,15 @@ process vardict_tonly { -E \ -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf - printf "${tumor.Name}\t${tumorname}\n" > sampname - + printf "${tumor.Name}\t${tumorname}\n" > sampname + bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \ | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz """ stub: - + """ touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz @@ -309,30 +324,27 @@ process vardict_tonly { process octopus_tonly { - //label 'process_highcpu' + container "${params.containers.octopus}" + label 'process_somaticcaller_high' input: tuple val(tumorname), path(tumor), path(tumorbai), path(bed) - + output: tuple val(tumorname), path("${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz") - - script: + script: """ octopus -R $GENOMEREF -C cancer -I ${tumor} \ --annotations AC AD DP \ --target-working-memory 64Gb \ -t ${bed} \ $SOMATIC_FOREST \ - -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus - - + -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads ${task.cpus} """ stub: - """ touch ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz """ @@ -341,11 +353,11 @@ process octopus_tonly { process somaticcombine_tonly { - label 'process_mid' - publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') + container "${params.containers.logan}" + label 'process_medium' - input: - tuple val(tumorsample), + input: + tuple val(tumorsample), val(callers), path(vcfs), path(vcfindex) @@ -376,11 +388,12 @@ process somaticcombine_tonly { } process annotvep_tonly { - publishDir("${outdir}/mafs", mode: "copy") + container "${params.containers.vcf2maf}" + label 'process_medium' input: - tuple val(tumorsample), - val(vc), path(tumorvcf), + tuple val(tumorsample), + val(vc), path(tumorvcf), path(vcfindex) @@ -397,15 +410,15 @@ process annotvep_tonly { NORM_VCF_ID_ARG="" NSAMPLES=${#VCF_SAMPLE_IDS[@]} if [ $NSAMPLES -gt 1 ]; then - # Assign tumor, normal IDs - # Look through column names and + # Assign tumor, normal IDs + # Look through column names and # see if they match provided IDs for (( i = 0; i < $NSAMPLES; i++ )); do echo "${VCF_SAMPLE_IDS[$i]}" if [ "${VCF_SAMPLE_IDS[$i]}" == !{tumorsample} ]; then TID_IDX=$i fi - + done if [ ! -z $NID_IDX ]; then @@ -414,9 +427,9 @@ process annotvep_tonly { fi fi VCF_TID=${VCF_SAMPLE_IDS[$TID_IDX]} - + zcat !{tumorvcf} > !{tumorvcf.baseName} - + mkdir -p tumor_only/!{vc} vcf2maf.pl \ @@ -439,9 +452,10 @@ process annotvep_tonly { } process combinemafs_tonly { + container "${params.containers.logan}" label 'process_low' - input: + input: path(allmafs) output: @@ -449,7 +463,7 @@ process combinemafs_tonly { shell: mafin= allmafs.join(" ") - + """ echo "Combining MAFs..." head -2 ${allmafs[0]} > final_tonly.maf @@ -461,6 +475,3 @@ process combinemafs_tonly { touch final_tonly.maf """ } - - - diff --git a/nextflow.config b/nextflow.config index 70fd5a7..6f66adb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,13 +7,14 @@ manifest { mainScript = "main.nf" } + includeConfig 'conf/containers.config' includeConfig 'conf/genomes.config' includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -params { +params { fastq_screen_conf = "${projectDir}/conf/fastq_screen.conf" get_flowcell_lanes = "${projectDir}/bin/scripts/flowcell_lane.py" @@ -23,11 +24,11 @@ params { script_ancestry = "${projectDir}/bin/sampleCompareAncestoryPlots.R" script_sequenza = "${projectDir}/bin/run_sequenza.R" script_freec = "${projectDir}/bin/make_freec_genome.pl" - script_freecpaired = "${projectDir}/bin/freec_paired.pl" + script_freecpaired = "${projectDir}/bin/make_freec_genome_paired.pl" freec_significance = "${projectDir}/bin/assess_significance.R" freec_plot = "${projectDir}/bin/makeGraph.R" lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" - split_regions = "24" //Number of regions to split by + split_regions = "24" //Number of regions to split by vep_cache = "/fdb/VEP/102/cache" @@ -40,10 +41,15 @@ params { bam=null //Set all Inputs to null sample_sheet=null + + fastq_file_input=null + bam_file_input=null + file_input=null + fastq_input=null bam_input=null + BAMINPUT=null - file_input=null publish_dir_mode = 'symlink' outdir = 'results' @@ -67,25 +73,25 @@ profiles { autoMounts = true cacheDir = "$PWD/singularity" envWhitelist='https_proxy,http_proxy,ftp_proxy,DISPLAY,SLURM_JOBID' - runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/nousomedr/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' - } - biowulf { - includeConfig 'conf/biowulf.config' + runOptions = '-B /gs10,/gs11,/gs12,/gs9,/spin1,/data/CCBR_Pipeliner/,/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/,/data/CCBR/projects/,/vf/users,/gpfs,/fdb' + } + biowulf { + includeConfig 'conf/biowulf.config' } - frce { - includeConfig 'conf/frce.config' + frce { + includeConfig 'conf/frce.config' } - interactive { - includeConfig 'conf/interactive.config' + interactive { + includeConfig 'conf/interactive.config' } - slurm { - includeConfig 'conf/slurm.config' + slurm { + includeConfig 'conf/slurm.config' } - ci_stub { - includeConfig 'conf/ci_stub.config' + ci_stub { + includeConfig 'conf/ci_stub.config' } } - + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -99,7 +105,7 @@ profiles { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] - + diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 3e27ff2..766a41a 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -71,8 +71,8 @@ workflow INPUT { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) - }else if(params.file_input) { - fastqinput=Channel.fromPath(params.file_input) + }else if(params.fastq_file_input) { + fastqinput=Channel.fromPath(params.fastq_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) @@ -574,8 +574,8 @@ workflow INPUT_BAM { println "Missing BAM Index" } - }else if(params.file_input) { - baminputonly=Channel.fromPath(params.file_input) + }else if(params.bam_file_input) { + baminputonly=Channel.fromPath(params.bam_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai)) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index c73803a..9f19e1a 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -51,8 +51,8 @@ workflow INPUT_TONLY { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) - }else if(params.file_input) { - fastqinput=Channel.fromPath(params.file_input) + }else if(params.fastq_file_input) { + fastqinput=Channel.fromPath(params.fastq_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> tuple(sample, tuple(file(fq1),file(fq2))) @@ -346,8 +346,8 @@ workflow INPUT_TONLY_BAM { sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( samplename)} - }else if(params.file_input) { - baminputonly=Channel.fromPath(params.file_input) + }else if(params.bam_file_input) { + baminputonly=Channel.fromPath(params.bam_file_input) .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,bam,bai -> tuple(sample, file(bam),file(bai))