diff --git a/bin/convertStrelka.py b/bin/convertStrelka.py new file mode 100755 index 0000000..8550db1 --- /dev/null +++ b/bin/convertStrelka.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +import os +import numpy as np +import vcfpy +import sys + +def _tumor_normal_genotypes(ref, alt, info): + """Retrieve standard 0/0, 0/1, 1/1 style genotypes from INFO field. + + Normal -- NT field (ref, het, hom, conflict) + Tumor -- SGT field + - for SNPs specified as GG->TT for the normal and tumor diploid alleles. These + can also represent more complex alleles in which case we set at heterozygotes + pending longer term inclusion of genotypes in Strelka2 directly + (https://github.com/Illumina/strelka/issues/16) + - For indels, uses the ref, het, hom convention + + ref: The REF allele from a VCF line + alt: A list of potentially multiple ALT alleles (rec.ALT.split(";")) + info: The VCF INFO field + fname, coords: not currently used, for debugging purposes + """ + known_names = set(["het", "hom", "ref", "conflict"]) + def name_to_gt(val): + if val.lower() == "het": + return "0/1" + elif val.lower() == "hom": + return "1/1" + elif val.lower() in set(["ref", "conflict"]): + return "0/0" + else: + # Non-standard representations, het is our best imperfect representation + # print(fname, coords, ref, alt, info, val) + return "0/1" + def alleles_to_gt(val): + gt_indices = {gt.upper(): i for i, gt in enumerate([ref] + [alt])} + tumor_gts = [gt_indices[x.upper()] for x in val if x in gt_indices] + if tumor_gts and val not in known_names: + if max(tumor_gts) == 0: + tumor_gt = "0/0" + elif 0 in tumor_gts: + tumor_gt = "0/%s" % min([x for x in tumor_gts if x > 0]) + else: + tumor_gt = "%s/%s" % (min(tumor_gts), max(tumor_gts)) + else: + tumor_gt = name_to_gt(val) + return tumor_gt + nt_val = info.get('NT').split("=")[-1] + normal_gt = name_to_gt(nt_val) + sgt_val = info.get('SGT').split("=")[-1] + if not sgt_val: + tumor_gt = "0/0" + else: + sgt_val = sgt_val.split("->")[-1] + tumor_gt = alleles_to_gt(sgt_val) + return normal_gt, tumor_gt + + +def _af_annotate_and_filter(in_file,out_file): + """Populating FORMAT/AF, and dropping variants with AFU/DP (somatic snps), TIR/DPI (somatic indels)'), + ('Type','Float'), + ('Number', '.') + ])) + vcf.header.add_format_line(vcfpy.OrderedDict([ + ('ID', 'GT'), + ('Description', 'Genotype'), + ('Type','String'), + ('Number', '1') + ])) + writer = vcfpy.Writer.from_path(out_file, vcf.header) + for rec in vcf: + #print(rec) + if rec.is_snv(): # snps? + alt_counts_n = rec.calls[0].data[rec.ALT[0].value + "U"] # {ALT}U=tier1_depth,tier2_depth + alt_counts_t = rec.calls[1].data[rec.ALT[0].value + "U"] # {ALT}U=tier1_depth,tier2_depth + else: # indels + alt_counts_n = rec.calls[0].data['TIR'] # TIR=tier1_depth,tier2_depth + alt_counts_t = rec.calls[1].data['TIR'] + DP_n=rec.calls[0].data["DP"] + DP_t=rec.calls[1].data["DP"] + if DP_n is not None and DP_t is not None: + with np.errstate(divide='ignore', invalid='ignore'): # ignore division by zero and put AF=.0 + #alt_n = alt_counts_n[0]/DP_n + #alt_t = alt_counts_t[0]/DP_t + af_n = np.true_divide(alt_counts_n[0], DP_n) + af_t = np.true_divide(alt_counts_t[0], DP_t) + rec.add_format('AF',0) + rec.calls[0].data["AF"]= [round(af_n,5)] + rec.calls[1].data["AF"]= [round(af_t,5)] + normal_gt, tumor_gt= _tumor_normal_genotypes(rec.REF,rec.ALT[0].value,rec.INFO) + rec.add_format('GT',"1/0") + rec.calls[0].data["GT"]=normal_gt + rec.calls[1].data["GT"]=tumor_gt + writer.write_record(rec) + +if __name__ == '__main__': + filename = sys.argv[1] + outname = sys.argv[2] + _af_annotate_and_filter(filename, outname) + diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R index 3758b8a..d767c20 100644 --- a/bin/run_sequenza.R +++ b/bin/run_sequenza.R @@ -48,15 +48,16 @@ CP.example <- sequenza.fit(seqzdata, mc.cores = n_cores) ## Sequenza.extract seems to fail if too few mutations num_mutations <- unlist(lapply(seqzdata$mutations, nrow)) -chrom_list <- names(num_mutations)[num_mutations > 3] -## But it might actually be segments, idk? -#num_segments <- unlist(lapply(seqzdata$segments, nrow)) -#chrom_list <- names(num_mutations)[num_segments > 1] +chrom_list1 <- names(num_mutations)[num_mutations > 3] +## Also fails if segments <2 +num_segments <- unlist(lapply(seqzdata$segments, nrow)) +chrom_list2 <- names(num_mutations)[num_segments > 1] +chrom_list <- intersect(chrom_list1,chrom_list2) not_included <- setdiff(names(num_mutations), chrom_list) print("Printing results...") if (length(not_included) > 0) { - print("Excluding these chromosomes because of too few mutations...") + print("Excluding these chromosomes because of too few mutations and/or segments...") print(not_included) } sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir, chromosome.list=chrom_list) diff --git a/conf/base.config b/conf/base.config index 006a35b..c538c5c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,24 +31,24 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 4 * task.attempt, 'cpus' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 16 * task.attempt, 'cpus' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 32 * task.attempt, 'cpus' ) } - memory = { check_max( 120.GB * task.attempt, 'memory' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 72.h * task.attempt, 'time' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } @@ -56,12 +56,12 @@ process { withLabel:process_somaticcaller { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 64.GB * task.attempt, 'memory' ) } - time = { check_max( 72.h * task.attempt, 'time' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } } withLabel:process_somaticcaller_high { cpus = { check_max( 18 * task.attempt, 'cpus' ) } memory = { check_max( 96.GB * task.attempt, 'memory' ) } - time = { check_max( 72.h * task.attempt, 'time' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } } withLabel:process_highmem { cpus = { check_max( 4 * task.attempt, 'cpus' ) } diff --git a/conf/containers.config b/conf/containers.config index 385c9c2..c5ad361 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -6,6 +6,6 @@ params { vcf2maf = 'docker://dnousome/ccbr_vcf2maf:v102.0.0' lofreq = 'docker://dnousome/ccbr_lofreq:v0.0.1' octopus = 'docker://dancooke/octopus:latest' - annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:latest:v0.0.1' + annotcnvsv = 'docker://dnousome/ccbr_annotate_cnvsv:v0.0.1' } } diff --git a/conf/genomes.config b/conf/genomes.config index 3c864de..07a05d0 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -7,13 +7,14 @@ params { genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" + fullinterval = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/genomes/hg38_main.bed" INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz + gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} - kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" + germline_resource = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh38.86" snpeff_config = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/snpEff/4.3t/snpEff.config" @@ -30,7 +31,7 @@ params { chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] //HMFTOOLS GENOMEVER = "38" - HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz" + HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.somatic.38.vcf.gz" PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" @@ -45,15 +46,15 @@ params { genome = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa" genomefai = "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.fa.fai" bwagenome= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" - genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.dict" + genomedict= "/data/CCBR_Pipeliner/db/PipeDB/lib/hg19.with_extra.dict" intervals= "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/hg19_noblacklist_maincontig.bed" - INDELREF = "/fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" - KNOWNINDELS = "-known /fdb/GATK_resource_bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf -known /fdb/GATK_resource_bundle/b37/1000G_phase1.indels.b37.vcf" - KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' + INDELREF = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz -known /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/1000G_phase1.indels.hg19.vcf.gz" + KNOWNRECAL = '--known-sites /fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.excluding_sites_after_129.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/Mills_and_1000G_gold_standard.indels.hg19.vcf.gz' dbsnp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz" - gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz - pon = "/data/CCBR_Pipeliner/db/PipeDB/lib/GRCh37.noCOSMIC_ClinVar.pon.vcf.gz" - kgp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz" + germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz" + gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz' + pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh37.75" snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config" @@ -70,7 +71,7 @@ params { chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] //HMFTOOLS GENOMEVER = "37" - HOTSPOTS = "-hotspots /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz" + HOTSPOTS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/KnownHotspots.38.vcf.gz" PANELBED = "-panel_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/ActionableCodingPanel.38.bed.gz" HCBED = "-high_confidence_bed /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/variants/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed.gz" ENSEMBLCACHE = "-ensembl_data_dir /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/hmftools/v5_34/ref/38/common/ensembl_data" @@ -80,8 +81,6 @@ params { DIPLODREG = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DiploidRegions.38.bed.gz' ENSEMBLCACHE = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/ensembl_data/' DRIVERS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/DriverGenePanel.38.tsv' - HOTSPOTS = '/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PURPLE/KnownHotspots.somatic.38.vcf.gz' - } 'mm10' { @@ -95,9 +94,9 @@ params { KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz" dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" - kgp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz" - KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" + germline_resource = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_knownSNPs_sites.vcf.gz" gnomad= "--germline-resource /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" + KRAKENBACDB = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCm38.86" snpeff_config = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/snpEff.config" snpeff_bundle = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/snpEff/4.3t/" diff --git a/docker/annotate_cnvsv/Dockerfile b/docker/annotate_cnvsv/Dockerfile index 2955075..308ad14 100644 --- a/docker/annotate_cnvsv/Dockerfile +++ b/docker/annotate_cnvsv/Dockerfile @@ -12,13 +12,18 @@ LABEL maintainer # Create Container filesystem specific # working directory and opt directories +RUN apt-get update \ + && apt-get -y upgrade \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + tclsh + WORKDIR /opt2 ###Create AnnotSV -RUN wget https://github.com/lgmgeo/AnnotSV/archive/refs/tags/v3.3.6.tar.gz \ - && tar -xvzf /opt2/v3.3.6.tar.gz \ - && rm /opt2/v3.3.6.tar.gz -ENV PATH="/opt2/AnnotSV-3.3.6/bin:$PATH" +RUN wget https://github.com/lgmgeo/AnnotSV/archive/refs/tags/v3.4.2.tar.gz \ + && tar -xvzf /opt2/v3.4.2.tar.gz \ + && rm /opt2/v3.4.2.tar.gz +ENV PATH="/opt2/AnnotSV-3.4.2/bin:$PATH" ##ClassifyCNV ##Update the resources for ClassifyCNV diff --git a/docker/annotate_cnvsv/meta.yml b/docker/annotate_cnvsv/meta.yml index 5c62fda..aa0a99c 100644 --- a/docker/annotate_cnvsv/meta.yml +++ b/docker/annotate_cnvsv/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: dnousome image_name: ccbr_annotate_cnvsv -version: v0.0.1 +version: v0.0.2 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 2afd70f..e80ed91 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -86,7 +86,7 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD5 && apt-get -y install r-base r-base-core r-recommended r-base-dev \ && apt-get -y install libcurl4-openssl-dev libssl-dev libboost-dev libxml2-dev RUN apt-get -y install r-cran-tidyverse r-cran-plyr r-cran-knitr -RUN apt-get -y install r-cran-plotly r-cran-rcolorbrewer r-cran-htmlwidgets r-cran-shiny r-cran-rmarkdown r-cran-crosstalk r-cran-dt r-cran-reshape2 r-cran-circlize r-cran-viridis r-cran-gridextra r-cran-rcurl +RUN apt-get -y install r-cran-plotly r-cran-rcolorbrewer r-cran-htmlwidgets r-cran-shiny r-cran-rmarkdown r-cran-crosstalk r-cran-dt r-cran-reshape2 r-cran-circlize r-cran-viridis r-cran-gridextra r-cran-rcurl r-cran-cowplot RUN apt-get -y install r-cran-biocmanager r-cran-devtools r-cran-snow r-bioc-limma r-bioc-edger r-bioc-complexheatmap r-bioc-genomicranges r-bioc-summarizedexperiment r-bioc-biocparallel RUN Rscript -e 'install.packages(c("argparse"), repos="http://cran.r-project.org")' RUN Rscript -e 'install.packages(c("flexdashboard"), repos="http://cran.r-project.org")' diff --git a/main.nf b/main.nf index 3009c56..325842a 100644 --- a/main.nf +++ b/main.nf @@ -95,7 +95,7 @@ workflow { } ///Tumor Only Pipelines - if ([params.fastq_input,params.file_input].any() && !params.sample_sheet){ + if ([params.fastq_input,params.fastq_file_input].any() && !params.sample_sheet){ println "Tumor-Only FASTQ" INPUT_TONLY() ALIGN_TONLY(INPUT_TONLY.out.fastqinput,INPUT_TONLY.out.sample_sheet) diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index 4a00109..3b2855d 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -1,13 +1,13 @@ -GENOMEREF=file(params.genomes[params.genome].genome) -SEQUENZAGC=file(params.genomes[params.genome].SEQUENZAGC) -SEQUENZA_SCRIPT=params.script_sequenza +GENOMEREF = file(params.genomes[params.genome].genome) +SEQUENZAGC = file(params.genomes[params.genome].SEQUENZAGC) +SEQUENZA_SCRIPT = params.script_sequenza if (params.genome=="mm10"){ - FREECLENGTHS=file(params.genomes[params.genome].FREEC.FREECLENGTHS) - FREECCHROMS=file(params.genomes[params.genome].FREEC.FREECCHROMS) - FREECPILEUP=file(params.genomes[params.genome].FREEC.FREECPILEUP) - FREECSNPS = file(params.genomes[params.genome].FREEC.FREECSNPS) - FREECTARGETS=file(params.genomes[params.genome].intervals) + FREECLENGTHS = params.genomes[params.genome].FREEC.FREECLENGTHS + FREECCHROMS = params.genomes[params.genome].FREEC.FREECCHROMS + FREECPILEUP = params.genomes[params.genome].FREEC.FREECPILEUP + FREECSNPS = params.genomes[params.genome].FREEC.FREECSNPS + FREECTARGETS = params.genomes[params.genome].intervals FREECSCRIPT = params.script_freec FREECPAIR_SCRIPT = params.script_freecpaired FREECSIGNIFICANCE = params.freec_significance @@ -15,19 +15,19 @@ if (params.genome=="mm10"){ } if (params.genome=="hg38" | params.genome=="hg19"){ - GENOMEVER=params.genomes[params.genome].GENOMEVER - GCPROFILE=file(params.genomes[params.genome].GCPROFILE) - GERMLINEHET=file(params.genomes[params.genome].GERMLINEHET) - DIPLODREG=file(params.genomes[params.genome].DIPLODREG) - ENSEMBLCACHE=params.genomes[params.genome].ENSEMBLCACHE - DRIVERS=file(params.genomes[params.genome].DRIVERS) - HOTSPOTS=file(params.genomes[params.genome].HOTSPOTS) + GENOMEVER = params.genomes[params.genome].GENOMEVER + GCPROFILE = file(params.genomes[params.genome].GCPROFILE) + GERMLINEHET = file(params.genomes[params.genome].GERMLINEHET) + DIPLODREG = file(params.genomes[params.genome].DIPLODREG) + ENSEMBLCACHE = params.genomes[params.genome].ENSEMBLCACHE + DRIVERS = file(params.genomes[params.genome].DRIVERS) + HOTSPOTS = file(params.genomes[params.genome].HOTSPOTS) } //mm10 Paired-Sequenza, FREEC-tumor only process seqz_sequenza_bychr { container = "${params.containers.logan}" - label 'process_low' + label 'process_long' input: tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), @@ -53,13 +53,93 @@ process seqz_sequenza_bychr { """ } +process pileup_sequenza { + container = "${params.containers.logan}" + label 'process_low' + input: + tuple val(pairid), val(name), + path(bam), path(bai), path(bed) + output: + tuple val(pairid), path("${name}_${bed}.mpileup.gz"), path("${name}_${bed}.mpileup.gz.tbi") -process sequenza { + script: + //Q20 is default in sequenza + """ + samtools mpileup -f $GENOMEREF -R ${bed} -Q 20 ${bam} |gzip > ${name}_${bed}.mpileup.gz + tabix -s1 -b2 -e2 ${name}_${bed}.mpileup.gz + """ + + stub: + """ + touch "${name}_${bed}.mpileup.gz" + touch "${name}_${bed}.mpileup.gz.tbi" + """ +} + +process seqz_sequenza_reg { container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") + + script: + """ + sequenza-utils bam2seqz \ + -gc ${SEQUENZAGC} \ + -p \ + -F $GENOMEREF \ + -n ${normal} \ + -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" - label 'process_highcpu' + """ + + stub: + """ + touch "${tumorname}_${normalname}_${chr}.seqz.gz" + """ +} + +process seqz_sequenza { + container = "${params.containers.logan}" + label 'process_low' + + input: + tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) + + output: + tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") + + script: + """ + sequenza-utils bam2seqz \ + -gc ${SEQUENZAGC} \ + -p \ + -F $GENOMEREF \ + -n ${normal} \ + -t ${tumor} | gzip > "${tumorname}_${normalname}_${bed}.seqz.gz" + + """ + + stub: + """ + touch "${tumorname}_${normalname}_${chr}.seqz.gz" + """ +} + + + + +process sequenza { + container = "${params.containers.logan}" + label 'process_medium' input: tuple val(pairid), path(seqz) @@ -123,7 +203,7 @@ process sequenza { process freec_paired { container = "${params.containers.logan}" - label 'process_highcpu' + label 'process_long' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -318,7 +398,6 @@ process amber_tn { process cobalt_tonly { container = "${params.containers.logan}" - label 'process_medium' input: @@ -350,7 +429,6 @@ process cobalt_tonly { process cobalt_tn { container = "${params.containers.logan}" - label 'process_medium' input: @@ -364,7 +442,7 @@ process cobalt_tn { """ java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ - -tumor ${tumorname} -tumor_bam ${tumorname} \ + -tumor ${tumorname} -tumor_bam ${tumor} \ -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_cobalt \ -threads $task.cpus \ @@ -387,7 +465,7 @@ process purple { input: tuple val(tumorname), val(normalname), - path(cobaltin), path(amberin), + path(amberin), path(cobaltin), path(somaticvcf), path(somaticvcfindex) output: @@ -404,9 +482,8 @@ process purple { -gc_profile $GCPROFILE \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - -ensembl_data_dir $ENSEMBLCACHE \ + $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ - -run_drivers \ -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ -threads $task.cpus \ @@ -445,7 +522,7 @@ process purple_novc { -gc_profile $GCPROFILE \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - -ensembl_data_dir $ENSEMBLCACHE \ + $ENSEMBLCACHE \ -threads $task.cpus \ -output_dir ${tumorname} @@ -483,9 +560,8 @@ process purple_tonly { -gc_profile $GCPROFILE \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - -ensembl_data_dir $ENSEMBLCACHE \ + $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ - -run_drivers \ -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ -threads $task.cpus \ @@ -523,7 +599,7 @@ process purple_tonly_novc { -gc_profile $GCPROFILE \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - -ensembl_data_dir $ENSEMBLCACHE \ + $ENSEMBLCACHE \ -threads $task.cpus \ -output_dir ${tumorname} """ diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index 71566e8..1ea049a 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -7,7 +7,7 @@ INDELREF=file(params.genomes[params.genome].INDELREF) process svaba_somatic { container = "${params.containers.logan}" - label 'process_highcpu' + label 'process_high' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) @@ -58,7 +58,7 @@ process svaba_somatic { process manta_somatic { container = "${params.containers.logan}" - label 'process_highcpu' + label 'process_high' input: tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai) @@ -80,7 +80,7 @@ process manta_somatic { --referenceFasta=$GENOMEREF \ --runDir=wd - wd/runWorkflow.py -m local -j 10 -g 10 + wd/runWorkflow.py -m local -j $task.cpus mv wd/results/variants/diploidSV.vcf.gz ${tumor.simpleName}.diplodSV.vcf.gz mv wd/results/variants/somaticSV.vcf.gz ${tumor.simpleName}.somaticSV.vcf.gz @@ -104,7 +104,7 @@ process annotsv_tn { //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files //Requires bedtools,bcftools container = "${params.containers.annotcnvsv}" - process + input: tuple val(tumorname), path(somaticvcf), val(sv) @@ -137,7 +137,7 @@ process annotsv_tn { process manta_tonly { container = "${params.containers.logan}" - label 'process_highcpu' + label 'process_high' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -158,7 +158,7 @@ process manta_tonly { --referenceFasta=$GENOMEREF \ --runDir=wd - wd/runWorkflow.py -m local -j 10 -g 10 + wd/runWorkflow.py -m local -j $task.cpus mv wd/results/variants/candidateSV.vcf.gz ${tumor.simpleName}.candidateSV.vcf.gz mv wd/results/variants/candidateSmallIndels.vcf.gz ${tumor.simpleName}.candidateSmallIndels.vcf.gz @@ -180,7 +180,7 @@ process manta_tonly { process svaba_tonly { container = "${params.containers.logan}" - label 'process_highcpu' + label 'process_high' input: tuple val(tumorname), path(tumor), path(tumorbai) diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 715ff64..2df6abf 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -42,8 +42,10 @@ process fastp { process bwamem2 { container = "${params.containers.logan}" + label 'process_high' tag { name } + input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index c1cab84..a55b424 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -1,9 +1,9 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) +GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) GNOMADGERMLINE=params.genomes[params.genome].gnomad +DBSNP=file(params.genomes[params.genome].dbsnp) PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies @@ -30,9 +30,9 @@ process mutect2 { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats") + path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats") script: @@ -42,20 +42,20 @@ process mutect2 { --intervals ${bed} \ --input ${tumor} \ --input ${normal} \ - --normal-sample ${normal.simpleName} \ - --tumor-sample ${tumor.simpleName} \ + --normal-sample ${normalname} \ + --tumor-sample ${tumorname} \ $GNOMADGERMLINE \ --panel-of-normals ${PON} \ - --output ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz \ - --f1r2-tar-gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz \ + --output ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz \ + --f1r2-tar-gz ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz \ --independent-mates """ stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.f1r2.tar.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.mut2.vcf.gz.stats """ } @@ -69,21 +69,21 @@ process pileup_paired_t { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") + path("${tumorname}_${bed.simpleName}.tpileup.table") script: """ gatk --java-options -Xmx48g GetPileupSummaries \ -I ${tumor} \ - -V $KGPGERMLINE \ + -V $GERMLINE_RESOURCE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumorname}_${bed.simpleName}.tpileup.table """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + touch ${tumorname}_${bed.simpleName}.tpileup.table """ } @@ -99,21 +99,21 @@ process pileup_paired_n { output: tuple val(tumorname), val(normalname), - path("${normal.simpleName}_${bed.simpleName}.normal.pileup.table") + path("${normalname}_${bed.simpleName}.npileup.table") script: """ gatk --java-options -Xmx48g GetPileupSummaries \ -I ${normal} \ - -V $KGPGERMLINE \ + -V $GERMLINE_RESOURCE \ -L ${bed} \ - -O ${normalname}_${bed.simpleName}.normal.pileup.table + -O ${normalname}_${bed.simpleName}.npileup.table """ stub: """ - touch ${normalname}_${bed.simpleName}.normal.pileup.table + touch ${normalname}_${bed.simpleName}.npileup.table """ } @@ -263,7 +263,8 @@ process mutect2filter { tuple val("${tumor}_vs_${normal}"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), - path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") script: @@ -304,17 +305,17 @@ process mutect2filter { process strelka_tn { container "${params.containers.logan}" - label 'process_highcpu' + label 'process_high' input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") script: @@ -331,26 +332,26 @@ process strelka_tn { --runDir=wd \ --callRegions ${bed}.gz ./wd/runWorkflow.py -m local -j $task.cpus - mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz - mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz + mv wd/results/variants/somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz + mv wd/results/variants/somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname - bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz - bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz """ stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz ${tumorname}_vs_${normalname}_${bed.simpleName}.somatic.indels.vcf.gz.tbi """ @@ -366,7 +367,7 @@ process vardict_tn { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz") + path("${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz") //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” script: @@ -386,12 +387,12 @@ process vardict_tn { -d 10 \ -v 6 \ -S \ - -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf + -f 0.05 > ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname - bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \ - | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz """ @@ -399,7 +400,7 @@ process vardict_tn { stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.vardict.vcf.gz """ @@ -419,7 +420,7 @@ process varscan_tn { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz") + path("${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz") shell: ''' @@ -427,29 +428,29 @@ process varscan_tn { normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l) dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}" varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity" - varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" + varscan_cmd="varscan somatic <($dual_pileup) !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" eval "$varscan_cmd" - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \ - | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz - awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \ - | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.indel \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.snp \ + | sed '/^$/d' | bcftools view - -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz - gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ - -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ + gatk SortVcf -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ + -I !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ -R !{GENOMEREF} -SD !{GENOMEDICT} \ - -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf + -O !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname - bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \ - | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz + bcftools reheader -s sampname !{tumorname}_vs_!{normalname}_!{bed.simpleName}_temp.varscan.vcf \ + | bcftools view -Oz -o !{tumorname}_vs_!{normalname}_!{bed.simpleName}.varscan.vcf.gz ''' stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz + touch ${tumorname}_vs_${normalname}_${bed.simpleName}.varscan.vcf.gz """ } @@ -488,12 +489,12 @@ process octopus_tn { process sage_tn { - container "${params.containers.hmftools}" - label 'process_somaticcaller' + container "${params.containers.logan}" + label 'process_high' input: - tuple val(tumorname), path(tumor), path(tumorbai), - val(normalname), path(normal), path(normalbai) + tuple val(tumorname), path(tumorbam), path(tumorbai), + val(normalname), path(normalbam), path(normalbai) output: tuple val(tumorname), val(normalname), @@ -502,13 +503,14 @@ process sage_tn { script: """ - java -Xms4G -Xmx32G -cp sage.jar com.hartwig.hmftools.sage.SageApplication \ + java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar com.hartwig.hmftools.sage.SageApplication \ -tumor ${tumorname} -tumor_bam ${tumorbam} \ -reference ${normalname} -reference_bam ${normalbam} \ -threads $task.cpus \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - $HOTSPOTS $PANELBED $HCBED $ENSEMBLCACHE \ + -hotspots $HOTSPOTS \ + $PANELBED $HCBED $ENSEMBLCACHE \ -output_vcf ${tumorname}_vs_${normalname}.sage.vcf.gz """ @@ -682,22 +684,46 @@ process combineVariants_alternative { script: vcfin = vcfs.join(" ") - """ - mkdir ${vc} - bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz - bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf - bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + + if (vc.contains("octopus")) { + """ + mkdir ${vc} + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf + bcftools sort ${sample}.${vc}.temp.vcf | bcftools view - -i "INFO/SOMATIC==1" -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf - bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + mv ${sample}.${vc}.marked.vcf.gz ${vc} - mv ${sample}.${vc}.marked.vcf.gz ${vc} + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t + """ + + }else{ + """ + mkdir ${vc} + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf + bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ + awk '{{gsub(/\\y[W|K|Y|R|S|M|B|D|H|V]\\y/,"N",\$4); OFS = "\t"; print}}' |\ + sed '/^\$/d' > ${sample}.${vc}.temp.vcf + + bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz + mv ${sample}.${vc}.marked.vcf.gz ${vc} + + bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t + """ + } - bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t - bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t - """ + + + + stub: @@ -831,7 +857,33 @@ process somaticcombine { } +process ffpe_1 { + container "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(tumorsample), val(normal), + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi"), + bam, bamindex + tuple val(tumorsample), val(normal), + val(caller), + path(vcfs), path(vcfindex) + + output: + tuple val(tumorsample), val(normal), + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") + + script: + + + stub: + """ + touch "${tumorsample}_vs_${normal}_combined_ffpolish.vcf.gz" + """ +} /*DISCVR process somaticcombine { container "${params.containers.logan}" diff --git a/modules/local/variant_calling_tonly.nf b/modules/local/variant_calling_tonly.nf index 0ec9762..4e0991e 100644 --- a/modules/local/variant_calling_tonly.nf +++ b/modules/local/variant_calling_tonly.nf @@ -1,9 +1,9 @@ GENOMEREF=file(params.genomes[params.genome].genome) GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) -KGPGERMLINE=params.genomes[params.genome].kgp -DBSNP=file(params.genomes[params.genome].dbsnp) +GERMLINE_RESOURCE=file(params.genomes[params.genome].germline_resource) GNOMADGERMLINE=params.genomes[params.genome].gnomad +DBSNP=file(params.genomes[params.genome].dbsnp) PON=file(params.genomes[params.genome].pon) VEPCACHEDIR=file(params.genomes[params.genome].vepcache) VEPSPECIES=params.genomes[params.genome].vepspecies @@ -29,22 +29,22 @@ process pileup_paired_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") + path("${tumorname}_${bed.simpleName}.tpileup.table") script: """ gatk --java-options -Xmx48g GetPileupSummaries \ -I ${tumor} \ - -V $KGPGERMLINE \ + -V $GERMLINE_RESOURCE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + -O ${tumorname}_${bed.simpleName}.tpileup.table """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table + touch ${tumorname}_${bed.simpleName}.tpileup.table """ @@ -123,7 +123,6 @@ process learnreadorientationmodel_tonly { process mergemut2stats_tonly { container "${params.containers.logan}" - label 'process_low' input: @@ -159,9 +158,9 @@ process mutect2_t_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz"), - path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"), - path("${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") + path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz"), + path("${tumorname}_${bed.simpleName}.f1r2.tar.gz"), + path("${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats") script: @@ -170,19 +169,19 @@ process mutect2_t_tonly { --reference $GENOMEREF \ --intervals ${bed} \ --input ${tumor} \ - --tumor-sample ${tumor.simpleName} \ + --tumor-sample ${tumorname} \ $GNOMADGERMLINE \ --panel-of-normals $PON \ - --output ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz \ - --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \ + --output ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz \ + --f1r2-tar-gz ${tumorname}_${bed.simpleName}.f1r2.tar.gz \ --independent-mates """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz - touch ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz - touch ${tumor.simpleName}_${bed.simpleName}.tonly.mut2.vcf.gz.stats + touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz + touch ${tumorname}_${bed.simpleName}.f1r2.tar.gz + touch ${tumorname}_${bed.simpleName}.tonly.mut2.vcf.gz.stats """ @@ -208,8 +207,7 @@ process mutect2filter_tonly { """ - gatk GatherVcfs -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz - gatk IndexFeatureFile -I ${sample}.tonly.concat.vcf.gz + gatk SortVcf -I ${mut2in} -O ${sample}.tonly.concat.vcf.gz --CREATE_INDEX gatk FilterMutectCalls \ -R $GENOMEREF \ -V ${sample}.tonly.concat.vcf.gz \ @@ -253,7 +251,7 @@ process varscan_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz") + path("${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz") shell: @@ -276,7 +274,7 @@ process varscan_tonly { stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz + touch ${tumorname}_${bed.simpleName}.tonly.varscan.vcf.gz """ } @@ -291,7 +289,7 @@ process vardict_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz") + path("${tumorname}_${bed.simpleName}.tonly.vardict.vcf.gz") script: @@ -388,11 +386,11 @@ process octopus_convertvcf_tonly { process sage_tonly { - container "${params.containers.hmftools}" + container "${params.containers.logan}" label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai) + tuple val(tumorname), path(tumorbam), path(tumorbai) output: tuple val(tumorname), @@ -401,12 +399,13 @@ process sage_tonly { script: """ - java -Xms4G -Xmx32G -cp sage.jar com.hartwig.hmftools.sage.SageApplication \ + java -Xms4G -Xmx32G -cp /opt2/hmftools/sage.jar com.hartwig.hmftools.sage.SageApplication \ -tumor ${tumorname} -tumor_bam ${tumorbam} \ -threads $task.cpus \ -ref_genome_version $GENOMEVER \ -ref_genome $GENOMEREF \ - $HOTSPOTS $PANELBED $HCBED $ENSEMBLCACHE \ + -hotspots $HOTSPOTS \ + $PANELBED $HCBED $ENSEMBLCACHE \ -output_vcf ${tumorname}.tonly.sage.vcf.gz """ @@ -436,7 +435,7 @@ process somaticcombine_tonly { vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - callerin=caller.join(",").replaceAll("_tonly","") + callerin=caller.join(",")//.replaceAll("_tonly","") """ /usr/lib/jvm/java-8-openjdk-amd64/bin/java -jar \$GATK_JAR -T CombineVariants \ @@ -450,9 +449,9 @@ process somaticcombine_tonly { stub: - vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - callerin=caller.join(",").replaceAll("_tonly","") + vcfin1=[caller, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") + callerin=caller.join(",")//.replaceAll("_tonly","") """ touch ${tumorsample}_combined_tonly.vcf.gz ${tumorsample}_combined_tonly.vcf.gz.tbi diff --git a/nextflow.config b/nextflow.config index 6a35323..9f5c5c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,8 +40,10 @@ params { cnv=null qc=null bam=null + align=null indelrealign=null - + no_tonly=null + //Set all Inputs to null sample_sheet=null fastq_file_input=null @@ -53,8 +55,10 @@ params { BAMINPUT=null callers = "mutect2,octopus,lofreq,muse,sage,vardict,varscan" - cnvcallers= "purple,sequenza,freec" - + tonlycallers = "mutect2,octopus,vardict,varscan" + cnvcallers = "purple,sequenza,freec" + svcallers = "manta,svaba" + intervals = null publish_dir_mode = 'symlink' outdir = 'results' diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 094283a..e7d9956 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -100,7 +100,12 @@ workflow ALIGN { main: fastp(fastqinput) - intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals) + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } + splitinterval(intervalbedin) bwamem2(fastp.out) @@ -177,7 +182,9 @@ workflow VC { //Prep Pileups call_list = params.callers.split(',') as List - + call_list_tonly = params.tonlycallers.split(',') as List + call_list_tonly = call_list.intersect(call_list_tonly) + vc_all=Channel.empty() vc_tonly=Channel.empty() @@ -189,26 +196,28 @@ workflow VC { pileup_paired_t.out.groupTuple(by:[0,1]) | multiMap { samplename, normalname, pileups -> tout: tuple( samplename, normalname, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ) + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tpileup.table/)[0][1].toInteger() } ) tonly: tuple( samplename, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ) + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tpileup.table/)[0][1].toInteger() } ) } | set{pileup_paired_tout} pileup_paired_n.out.groupTuple(by:[0,1]) | multiMap { samplename, normalname, pileups-> nout: tuple (samplename,normalname, - pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).normal.pileup.table/)[0][1].toInteger() } ) + pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).npileup.table/)[0][1].toInteger() } ) nonly: tuple (normalname, - pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).normal.pileup.table/)[0][1].toInteger() } ) + pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).npileup.table/)[0][1].toInteger() } ) } | set{pileup_paired_nout} pileup_paired_match=pileup_paired_tout.tout.join(pileup_paired_nout.nout,by:[0,1]) contamination_paired(pileup_paired_match) + if (!params.no_tonly){ pileup_all=pileup_paired_tout.tonly.concat(pileup_paired_nout.nonly) contamination_tumoronly(pileup_all) + } } if ("mutect2" in call_list){ @@ -238,17 +247,20 @@ workflow VC { | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} annotvep_tn_mut2(mutect2_in) + vc_all = vc_all|concat(mutect2_in) + //Mutect2 Tumor Only + if (!params.no_tonly){ mutect2_t_tonly(bambyinterval_t) - mutect2_t_tonly.out.groupTuple() - | multiMap { tumor,vcfs,f1r2,stats -> - mut2tout_lor: tuple(tumor, - f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) - mut2tonly_mstats: tuple( tumor, - stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) - allmut2tonly: tuple(tumor, - vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) + | groupTuple() + | multiMap { tumorid,vcfs,f1r2,stats -> + mut2tout_lor: tuple(tumorid, + f1r2.toSorted{ it -> (it.name =~ /${tumorid}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2tonly_mstats: tuple( tumorid, + stats.toSorted{ it -> (it.name =~ /${tumorid}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tonly: tuple(tumorid, + vcfs.toSorted{ it -> (it.name =~ /${tumorid}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) } | set{mut2tonlyout} @@ -264,10 +276,10 @@ workflow VC { | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} annotvep_tonly_mut2(mutect2_in_tonly) - - vc_all = vc_all|concat(mutect2_in) vc_tonly = vc_tonly | concat(mutect2_in_tonly) - + } + + } if ("strelka" in call_list){ @@ -293,7 +305,10 @@ workflow VC { | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} annotvep_tn_vardict(vardict_in) + vc_all=vc_all|concat(vardict_in) + //VarDict TOnly + if (!params.no_tonly){ vardict_in_tonly=vardict_tonly(bambyinterval_t) | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} @@ -301,8 +316,9 @@ workflow VC { | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) - vc_all=vc_all|concat(vardict_in) vc_tonly=vc_tonly|concat(vardict_in_tonly) + } + } if ("varscan" in call_list){ @@ -314,6 +330,10 @@ workflow VC { | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) + vc_all=vc_all|concat(varscan_in) + + + if (!params.no_tonly){ //VarScan TOnly varscan_in_tonly=bambyinterval_t.combine(contamination_tumoronly.out,by:0) | varscan_tonly | groupTuple @@ -323,11 +343,11 @@ workflow VC { | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} annotvep_tonly_varscan(varscan_in_tonly) - vc_all=vc_all|concat(varscan_in) vc_tonly=vc_tonly|concat(varscan_in_tonly) - } - + } + + } //SAGE TN if ("sage" in call_list){ @@ -338,6 +358,9 @@ workflow VC { | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"sage",normvcf,normindex)} annotvep_tn_sage(sage_in) + vc_all=vc_all | concat(sage_in) + + if (!params.no_tonly){ sage_in_tonly=bamwithsample | map{tumor,tbam,tbai,norm,nbam,nbai -> tuple(tumor,tbam,tbai)} | sage_tonly | map{samplename,vcf,vcfindex->tuple(samplename,vcf,"sage_tonly")} @@ -345,13 +368,10 @@ workflow VC { | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"sage_tonly",normvcf,normindex)} annotvep_tonly_sage(sage_in_tonly) - - vc_all=vc_all | concat(sage_in) vc_tonly=vc_tonly | concat(sage_in_tonly) - + } } - //Lofreq TN if ("lofreq" in call_list){ lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) @@ -386,8 +406,10 @@ workflow VC { annotvep_tn_octopus(octopus_in) octopus_in_sc = octopus_in | octopus_convertvcf | map{tumor,normal,vcf,vcfindex ->tuple(tumor,normal,"octopus",vcf,vcfindex)} + vc_all=vc_all|concat(octopus_in_sc) //Octopus TOnly + if (!params.no_tonly){ octopus_in_tonly=octopus_tonly(bambyinterval_t) | bcftools_index_octopus_tonly | groupTuple() @@ -398,35 +420,36 @@ workflow VC { annotvep_tonly_octopus(octopus_in_tonly) octopus_in_tonly_sc=octopus_in_tonly | octopus_convertvcf_tonly | map{tumor,vcf,vcfindex ->tuple(tumor,"octopus_tonly",vcf,vcfindex)} - - vc_all=vc_all|concat(octopus_in_sc) - vc_tonly=vc_tonly|concat(octopus_in_tonly) + vc_tonly=vc_tonly|concat(octopus_in_tonly_sc) + } + } //Combine All Variants Using VCF -> Annotate if (call_list.size()>1){ - somaticcall_input=vc_all | groupTuple(by:[0,1]) + vc_all | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} - somaticcall_input | annotvep_tn_combined - }else if ("octopus" in call_list){ - somaticcall_input=octopus_in_sc - }else if("mutect2" in call_list){ - somaticcall_input=mutect2_in - }else if("sage" in call_list){ - somaticcall_input=sage_in - } + | annotvep_tn_combined - - if (call_list.size()>1){ + if (!params.no_tonly & call_list_tonly.size()>1){ vc_tonly | groupTuple() | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} | annotvep_tonly_combined + } + } + if("sage" in call_list){ + somaticcall_input=sage_in + }else if("mutect2" in call_list){ + somaticcall_input=mutect2_in + }else{ + somaticcall_input=Channel.empty() } + //Implement PCGR Annotator/CivIC Next emit: somaticcall_input @@ -439,24 +462,29 @@ workflow SV { bamwithsample main: - //Svaba - svaba_out=svaba_somatic(bamwithsample) - .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log -> - tuple(tumor,so_sv,"svaba")} - annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated") + svcall_list = params.svcallers.split(',') as List + if ("svaba" in svcall_list){ + //Svaba + svaba_out=svaba_somatic(bamwithsample) + .map{ tumor,bps,contigs,discord,alignents,gindel,gsv,so_indel,so_sv,unfil_gindel,unfil_gsv,unfil_so_indel,unfil_sv,log -> + tuple(tumor,so_sv,"svaba")} + annotsv_svaba(svaba_out).ifEmpty("Empty SV input--No SV annotated") + } + if ("manta" in svcall_list){ //Manta manta_out=manta_somatic(bamwithsample) .map{tumor,gsv,so_sv,unfil_sv,unfil_indel -> tuple(tumor,so_sv,"manta")} annotsv_manta(manta_out).ifEmpty("Empty SV input--No SV annotated") - + } //Delly-WIP - //Survivor - gunzip(manta_out).concat(svaba_out).groupTuple() - | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") - + if ("manta" in svcall_list & "svaba" in svcall_list){ + //Survivor + gunzip(manta_out).concat(svaba_out).groupTuple() + | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") + } } workflow CNVmouse { @@ -698,11 +726,15 @@ workflow INPUT_BAM { tuple(sample, file(bam),file(bai)) } } - intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } splitinterval(intervalbedin) if (params.indelrealign){ - bqsrs= baminputonly | indelrealign | combine(splitinterval.out.flatten()) + bqsrs = baminputonly | indelrealign | combine(splitinterval.out.flatten()) | bqsr_ir | groupTuple | map { samplename,beds -> diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 5c0fdfb..d691039 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -48,32 +48,26 @@ include {splitinterval} from '../../modules/local/splitbed.nf' workflow INPUT_TONLY { if(params.fastq_input){ fastqinput=Channel.fromFilePairs(params.fastq_input) - - }else if(params.fastq_file_input) { - fastqinput=Channel.fromPath(params.fastq_file_input) + }else if(params.fastq_file_input){ + fastqinput=Channel.fromPath(params.fastq_file_input).view() .splitCsv(header: false, sep: "\t", strip:true) .map{ sample,fq1,fq2 -> - tuple(sample, tuple(file(fq1),file(fq2))) - } + tuple(sample, tuple(file(fq1),file(fq2))) + } } - - if(params.sample_sheet){ + if(params.sample_sheet){ sample_sheet=Channel.fromPath(params.sample_sheet, checkIfExists: true) - .ifEmpty { "sample sheet not found" } + .ifEmpty { "Sample sheet not found" } .splitCsv(header:true, sep: "\t") .map { row -> tuple( row.Tumor - ) - } - + )} }else{ sample_sheet=fastqinput.map{samplename,f1 -> tuple ( samplename)} } - - emit: fastqinput sample_sheet @@ -87,8 +81,12 @@ workflow ALIGN_TONLY { main: fastp(fastqinput) - intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') - splitinterval(intervalbedin) + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } + splitinterval(intervalbedin) bwamem2(fastp.out) //indelrealign(bwamem2.out) Consider indelreaglinement using ABRA? @@ -134,7 +132,10 @@ workflow VC_TONLY { bambyinterval=bamwithsample.combine(splitout.flatten()) //Common steps + //Ensure that Tumor Only callers are included call_list = params.callers.split(',') as List + call_list_tonly = params.tonlycallers.split(',') as List + call_list = call_list.intersect(call_list_tonly) vc_tonly=Channel.empty() @@ -142,7 +143,7 @@ workflow VC_TONLY { pileup_paired_tonly(bambyinterval) pileup_paired_tout=pileup_paired_tonly.out.groupTuple() .map{samplename,pileups-> tuple( samplename, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } , + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tpileup.table/)[0][1].toInteger() } , )} contamination_tumoronly(pileup_paired_tout) } @@ -233,24 +234,21 @@ workflow VC_TONLY { //Emit for SC downstream, take Oc/Mu2/sage/Vard/Varscan if (call_list.size()>1){ - somaticcall_input=vc_tonly + vc_tonly | groupTuple() | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} - somaticcall_input | annotvep_tonly_combined - }else if("octopus" in call_list){ - somaticcall_input=octopus_in_tonly_sc + | annotvep_tonly_combined + } + + if("sage" in call_list){ + somaticcall_input=sage_in_tonly }else if("mutect2" in call_list){ somaticcall_input=mutect2_in_tonly - }else if("sage" in call_list){ - somaticcall_input=sage_in_tonly - }else if("vardict" in call_list){ - somaticcall_input=vardict_in_tonly - }else if("varscan" in call_list){ - somaticcall_input=varscan_in_tonly + }else{ + somaticcall_input=Channel.empty() } - - //Emit for SC downstream, take Combined/Oc/Mu2/Vard/Varscan + emit: somaticcall_input } @@ -428,7 +426,11 @@ workflow INPUT_TONLY_BAM { sample_sheet=baminputonly.map{samplename,bam,bai -> tuple ( samplename)} } - intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + if (params.intervals){ + intervalbedin = Channel.fromPath(params.intervals,checkIfExists: true,type: 'file') + }else{ + intervalbedin = Channel.fromPath(params.genomes[params.genome].intervals,checkIfExists: true,type: 'file') + } splitinterval(intervalbedin) bamwithsample=baminputonly