From 0b10663256ddb41ff936e8462e15606ee5d7c6f1 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Sun, 7 May 2023 02:50:12 +0200 Subject: [PATCH 01/34] Update methylseq.nf --- workflows/methylseq.nf | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 21ad4d37..fc4c18bf 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -75,6 +75,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' +include { MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -137,6 +138,8 @@ workflow METHYLSEQ { ch_cat_fastq ) versions = versions.mix(FASTQC.out.versions.first()) + + /* * MODULE: Run TrimGalore! @@ -154,6 +157,10 @@ workflow METHYLSEQ { /* * SUBWORKFLOW: Align reads, deduplicate and extract methylation with Bismark */ + + /* + * SUBWORKFLOW: Align reads, deduplicate and extract methylation with Bismark + */ // Aligner: bismark or bismark_hisat if( params.aligner =~ /bismark/ ){ @@ -172,6 +179,12 @@ workflow METHYLSEQ { ch_dedup = BISMARK.out.dedup ch_aligner_mqc = BISMARK.out.mqc } + // SUBWORKFLOW: delete mark duplicates + + MARKDUPLICATES ( + ch_bam + ) + // Aligner: bwameth else if ( params.aligner == 'bwameth' ){ From 2a324cef679ac13efa2eb93d19175532df6388b9 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 11 May 2023 14:27:16 +0200 Subject: [PATCH 02/34] Update methylseq.nf Addition of Markduplicates module for each aligner --- workflows/methylseq.nf | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index fc4c18bf..322f381e 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -179,12 +179,19 @@ workflow METHYLSEQ { ch_dedup = BISMARK.out.dedup ch_aligner_mqc = BISMARK.out.mqc } - // SUBWORKFLOW: delete mark duplicates - - MARKDUPLICATES ( - ch_bam + /* + * Run Mark Duplicates on the BAM file + */ + MARK_DUPLICATES ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai ) - + ch_bam = MARK_DUPLICATES.out.bam + ch_bai = MARK_DUPLICATES.out.bai + ch_markduplicates_flagstat_multiqc = MARK_DUPLICATES.out.flagstat + ch_versions = ch_versions.mix(MARK_DUPLICATES.out.versions) +} // Aligner: bwameth else if ( params.aligner == 'bwameth' ){ @@ -200,7 +207,23 @@ workflow METHYLSEQ { ch_dedup = BWAMETH.out.dedup ch_aligner_mqc = BWAMETH.out.mqc } + /* + * Run Mark Duplicates on the BAM file + */ + + MARK_DUPLICATES ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai + ) + + ch_bam = MARK_DUPLICATES.out.bam + ch_bai = MARK_DUPLICATES.out.bai + ch_markduplicates_flagstat_multiqc = MARK_DUPLICATES.out.flagstat + ch_versions = ch_versions.mix(MARK_DUPLICATES.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_markduplicates_flagstat_multiqc) +} /* * MODULE: Qualimap BamQC */ From c4d67d5569d5ad13d64d798d2b1ffd74ea0f1bfc Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 11 May 2023 19:19:23 +0200 Subject: [PATCH 03/34] Update methylseq.nf Modification: HS metrics --- workflows/methylseq.nf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 322f381e..149c9d07 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -76,6 +76,7 @@ include { TRIMGALORE } from '../modules/nf-core/trimgalore/main include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' include { MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' +include {PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -224,6 +225,20 @@ workflow METHYLSEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_markduplicates_flagstat_multiqc) } + +/* + * SUBWORKFLOW: Collect hybrid selection metrics with Picard + */ +def minMQ = params.min_mapping_quality ?: 20 +MARKDUPLICATES(reads) +PICARD_COLLECTHSMETRICS( + reference_genome: PREPARE_GENOME.out.reference_genome, + intervals: params.intervals ? file(params.intervals, checkIfExists: true) : null, + minimum_mapping_quality: minMQ +) +.set { ch_hsmetrics } + + /* * MODULE: Qualimap BamQC */ From b978e52e52db4217dd38a114d29b9d2655702fe3 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Mon, 15 May 2023 13:12:50 +0200 Subject: [PATCH 04/34] Create collecthsmetrics collecthsmetrics module content --- modules/nf-core/collecthsmetrics | 82 ++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/nf-core/collecthsmetrics diff --git a/modules/nf-core/collecthsmetrics b/modules/nf-core/collecthsmetrics new file mode 100644 index 00000000..d721bc6f --- /dev/null +++ b/modules/nf-core/collecthsmetrics @@ -0,0 +1,82 @@ +process PICARD_COLLECTHSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*_metrics") , emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def bait_interval_list = bait_intervals + def bait_intervallist_cmd = "" + if (bait_intervals =~ /.(bed|bed.gz)$/){ + bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + def target_interval_list = target_intervals + def target_intervallist_cmd = "" + if (target_intervals =~ /.(bed|bed.gz)$/){ + target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + + """ + + $bait_intervallist_cmd + $target_intervallist_cmd + + picard \\ + -Xmx${avail_mem}M \\ + CollectHsMetrics \\ + $args \\ + $reference \\ + --BAIT_INTERVALS $bait_interval_list \\ + --TARGET_INTERVALS $target_interval_list \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectHsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} From ee479d77d7c250f358e2f12638a977ee45c7d39d Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Mon, 15 May 2023 13:14:13 +0200 Subject: [PATCH 05/34] Delete collecthsmetrics --- modules/nf-core/collecthsmetrics | 82 -------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 modules/nf-core/collecthsmetrics diff --git a/modules/nf-core/collecthsmetrics b/modules/nf-core/collecthsmetrics deleted file mode 100644 index d721bc6f..00000000 --- a/modules/nf-core/collecthsmetrics +++ /dev/null @@ -1,82 +0,0 @@ -process PICARD_COLLECTHSMETRICS { - tag "$meta.id" - label 'process_single' - - conda "bioconda::picard=3.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" - - input: - tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(dict) - - output: - tuple val(meta), path("*_metrics") , emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - def bait_interval_list = bait_intervals - def bait_intervallist_cmd = "" - if (bait_intervals =~ /.(bed|bed.gz)$/){ - bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - def target_interval_list = target_intervals - def target_intervallist_cmd = "" - if (target_intervals =~ /.(bed|bed.gz)$/){ - target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - - """ - - $bait_intervallist_cmd - $target_intervallist_cmd - - picard \\ - -Xmx${avail_mem}M \\ - CollectHsMetrics \\ - $args \\ - $reference \\ - --BAIT_INTERVALS $bait_interval_list \\ - --TARGET_INTERVALS $target_interval_list \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.CollectHsMetrics.coverage_metrics - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ -} From 5033f8dcda59878ac10d6665c70c9b1b1501ac1d Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 17 May 2023 14:08:41 +0200 Subject: [PATCH 06/34] Update methylseq.nf --- workflows/methylseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 149c9d07..c7e51552 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -75,8 +75,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' -include { MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' -include {PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' +include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' +include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From c26e5c04dd934d2b0fdd3081ea1c68607a6cffcc Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Sat, 20 May 2023 14:56:05 +0200 Subject: [PATCH 07/34] Update methylseq.nf Include module twice: Qualimap (Before and After Markduplicates in the BAM file) Add module Qualimap into the workflow twice (QUALIMAP and QUALIMAP2 The syntax of markduplicates module was corrected. HS metrics was also corrected with an else statment to avoid the use of the module in case that BED file is not included as an input. --- workflows/methylseq.nf | 109 ++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 56 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index c7e51552..4cbdd227 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -1,3 +1,4 @@ +params.target_interval = null /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS @@ -18,14 +19,15 @@ def checkPathParamList = [ params.bwa_meth_index, params.bismark_index, params.known_splices, + params.target_interval ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - - +// Check if target interval BED file is provided +def targetIntervalProvided = params.target_interval ? true : false /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -68,15 +70,16 @@ else if ( params.aligner == 'bwameth' ){ // // MODULE: Installed directly from nf-core/modules // -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' -include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' -include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' -include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' -include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' +include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' +include { QUALIMAP_BAMQC as QUALIMAP_BAMQC2 } from '../modules/nf-core/qualimap/bamqc/main' +include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' +include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' +include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -180,19 +183,7 @@ workflow METHYLSEQ { ch_dedup = BISMARK.out.dedup ch_aligner_mqc = BISMARK.out.mqc } - /* - * Run Mark Duplicates on the BAM file - */ - MARK_DUPLICATES ( - ch_bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai - ) - ch_bam = MARK_DUPLICATES.out.bam - ch_bai = MARK_DUPLICATES.out.bai - ch_markduplicates_flagstat_multiqc = MARK_DUPLICATES.out.flagstat - ch_versions = ch_versions.mix(MARK_DUPLICATES.out.versions) -} + // Aligner: bwameth else if ( params.aligner == 'bwameth' ){ @@ -208,37 +199,7 @@ workflow METHYLSEQ { ch_dedup = BWAMETH.out.dedup ch_aligner_mqc = BWAMETH.out.mqc } - /* - * Run Mark Duplicates on the BAM file - */ - - MARK_DUPLICATES ( - ch_bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai - ) - - ch_bam = MARK_DUPLICATES.out.bam - ch_bai = MARK_DUPLICATES.out.bai - ch_markduplicates_flagstat_multiqc = MARK_DUPLICATES.out.flagstat - ch_versions = ch_versions.mix(MARK_DUPLICATES.out.versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_markduplicates_flagstat_multiqc) - -} - -/* - * SUBWORKFLOW: Collect hybrid selection metrics with Picard - */ -def minMQ = params.min_mapping_quality ?: 20 -MARKDUPLICATES(reads) -PICARD_COLLECTHSMETRICS( - reference_genome: PREPARE_GENOME.out.reference_genome, - intervals: params.intervals ? file(params.intervals, checkIfExists: true) : null, - minimum_mapping_quality: minMQ -) -.set { ch_hsmetrics } - - + /* * MODULE: Qualimap BamQC */ @@ -247,6 +208,22 @@ PICARD_COLLECTHSMETRICS( [] ) versions = versions.mix(QUALIMAP_BAMQC.out.versions.first()) + + // MODULE: PICARD_MARKDUPLICATES + + PICARD_MARKDUPLICATES ( + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fasta_index + ch_bam + ) + versions = versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + ch_dedupm = PICARD_MARKDUPLICATES.out.bam + + QUALIMAP_BAMQC2 ( + ch_dedupm, + [] + ) + versions = versions.mix(QUALIMAP_BAMQC2.out.versions.first()) /* * MODULE: Run Preseq @@ -255,6 +232,25 @@ PICARD_COLLECTHSMETRICS( ch_bam ) versions = versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + + /* + * MODULE: HS METRICS. + */ + if (targetIntervalProvided){ + PICARD_COLLECTHSMETRICS ( + ch_bam, + file(params.target_interval) + ) + } else { + process warning { + input: + script: + ''' + echo "Target interval BED file not provided. HS metrics module will be skipped." + ''' + } + ch_bam // Pass the BAM channel directly to the next step without running the HS metrics module +} CUSTOM_DUMPSOFTWAREVERSIONS ( versions.unique().collectFile(name: 'collated_versions.yml') @@ -274,7 +270,8 @@ PICARD_COLLECTHSMETRICS( ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect{ it[1] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect{ it[0] }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC2.out.results.collect{ it[0] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect{ it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc.ifEmpty([])) if (!params.skip_trimming) { From f628c55746e52bd805ad68de7784d540ab9d3587 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Fri, 26 May 2023 20:03:59 +0200 Subject: [PATCH 08/34] Update modules.json The new modules were included. ' collecthsmetrics', and 'createsequenceddictionary' --- modules.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules.json b/modules.json index 634ae56d..6016ab64 100644 --- a/modules.json +++ b/modules.json @@ -80,6 +80,16 @@ "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", "installed_by": ["modules"] }, + "picard/collecthsmetrics": { + "branch": "master", + "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", + "installed_by": ["modules"] + }, + "picard/createsequencedictionary": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "picard/markduplicates": { "branch": "master", "git_sha": "eca65aa4a5e2e192ac44d6962c8f9260f314ffb8", From 5cd09c41eb2a9c653ba90a803dd3a513dd93f68f Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Fri, 26 May 2023 20:26:32 +0200 Subject: [PATCH 09/34] Update nextflow.config Parameter to execute the module hsmetrics in case that bed file is provided --- nextflow.config | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 01bcbe46..76eaf702 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,8 +22,10 @@ params { multiqc_logo = null max_multiqc_email_size = '25.MB' multiqc_methods_description = null - - + + // HS metrics + params.target_intervals = null + // Intermediate files save_reference = false save_align_intermeds = false From c71c7ff74ef0545f2945860f040a4fd61d3adffe Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:30:02 +0200 Subject: [PATCH 10/34] Update methylseq.nf Change HS metrics module, adition of the new parameters (Dictionary and the bai input) --- workflows/methylseq.nf | 74 ++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 4cbdd227..93ccdcee 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -1,4 +1,3 @@ -params.target_interval = null /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS @@ -26,9 +25,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -// Check if target interval BED file is provided -def targetIntervalProvided = params.target_interval ? true : false - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -78,7 +74,6 @@ include { TRIMGALORE } from '../modules/nf-core/ include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' include { QUALIMAP_BAMQC as QUALIMAP_BAMQC2 } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' -include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' /* @@ -199,7 +194,41 @@ workflow METHYLSEQ { ch_dedup = BWAMETH.out.dedup ch_aligner_mqc = BWAMETH.out.mqc } - + + /* + * MODULE: Index Alignments + */ + SAMTOOLS_INDEX_ALIGNMENTS (ch_bam) + + ch_bai = SAMTOOLS_INDEX_ALIGNMENTS.out.bai + versions = versions.mix(SAMTOOLS_INDEX_ALIGNMENTS.out.bai) + + /* + * MODULE: Picard CreateSequenceDictionary + */ + PICARD_CREATESEQUENCEDICTIONARY ( + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fasta_index + ) + ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.dict + versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + + /* + * MODULE: HS METRICS. + */ + if (params.target_interval){ + + ch_bam_bai.value([[],ch_bam,ch_bai,file(params.target_intervals)]) + ch_fasta.value([[],PREPARE_GENOME.out.fasta,]) + ch_fai.value([[],PREPARE_GENOME.out.fasta_index]) + ch_dic.value([[],ch_dict]) + + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) + } else { exit 2, "Target interval BED file not provided. HS metrics module will be skipped." } + + ch_bam // Pass the BAM channel directly to the next step without running the HS metrics module + + /* * MODULE: Qualimap BamQC */ @@ -209,21 +238,7 @@ workflow METHYLSEQ { ) versions = versions.mix(QUALIMAP_BAMQC.out.versions.first()) - // MODULE: PICARD_MARKDUPLICATES - - PICARD_MARKDUPLICATES ( - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fasta_index - ch_bam - ) - versions = versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - ch_dedupm = PICARD_MARKDUPLICATES.out.bam - - QUALIMAP_BAMQC2 ( - ch_dedupm, - [] - ) - versions = versions.mix(QUALIMAP_BAMQC2.out.versions.first()) + /* * MODULE: Run Preseq @@ -233,23 +248,6 @@ workflow METHYLSEQ { ) versions = versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) - /* - * MODULE: HS METRICS. - */ - if (targetIntervalProvided){ - PICARD_COLLECTHSMETRICS ( - ch_bam, - file(params.target_interval) - ) - } else { - process warning { - input: - script: - ''' - echo "Target interval BED file not provided. HS metrics module will be skipped." - ''' - } - ch_bam // Pass the BAM channel directly to the next step without running the HS metrics module } CUSTOM_DUMPSOFTWAREVERSIONS ( From 6a0d1a49b77f811e6688d29c4bb2b8e89f6a306e Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:31:31 +0200 Subject: [PATCH 11/34] Update methylseq.nf Include the modules --- workflows/methylseq.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 93ccdcee..99b4e013 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -75,6 +75,8 @@ include { QUALIMAP_BAMQC } from '../modules/nf-core/ include { QUALIMAP_BAMQC as QUALIMAP_BAMQC2 } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../modules/nf-core/samtools/index/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 0b922108f0b21743a8ba793171e8d8a7a94e16a5 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:31:21 +0200 Subject: [PATCH 12/34] hs metrics module Main process of module --- .../nf-core /picard /collecthsmetrics/main.nf | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/nf-core /picard /collecthsmetrics/main.nf diff --git a/modules/nf-core /picard /collecthsmetrics/main.nf b/modules/nf-core /picard /collecthsmetrics/main.nf new file mode 100644 index 00000000..d721bc6f --- /dev/null +++ b/modules/nf-core /picard /collecthsmetrics/main.nf @@ -0,0 +1,82 @@ +process PICARD_COLLECTHSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*_metrics") , emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def bait_interval_list = bait_intervals + def bait_intervallist_cmd = "" + if (bait_intervals =~ /.(bed|bed.gz)$/){ + bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + def target_interval_list = target_intervals + def target_intervallist_cmd = "" + if (target_intervals =~ /.(bed|bed.gz)$/){ + target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + + """ + + $bait_intervallist_cmd + $target_intervallist_cmd + + picard \\ + -Xmx${avail_mem}M \\ + CollectHsMetrics \\ + $args \\ + $reference \\ + --BAIT_INTERVALS $bait_interval_list \\ + --TARGET_INTERVALS $target_interval_list \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectHsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} From d220a24875642d4193e21b6365b7c2586d77cec8 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:32:54 +0200 Subject: [PATCH 13/34] Create main.nf module hs metrics --- .../nf-core/picard/collecths metrics/main.nf | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/nf-core/picard/collecths metrics/main.nf diff --git a/modules/nf-core/picard/collecths metrics/main.nf b/modules/nf-core/picard/collecths metrics/main.nf new file mode 100644 index 00000000..d721bc6f --- /dev/null +++ b/modules/nf-core/picard/collecths metrics/main.nf @@ -0,0 +1,82 @@ +process PICARD_COLLECTHSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*_metrics") , emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def bait_interval_list = bait_intervals + def bait_intervallist_cmd = "" + if (bait_intervals =~ /.(bed|bed.gz)$/){ + bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + def target_interval_list = target_intervals + def target_intervallist_cmd = "" + if (target_intervals =~ /.(bed|bed.gz)$/){ + target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + + """ + + $bait_intervallist_cmd + $target_intervallist_cmd + + picard \\ + -Xmx${avail_mem}M \\ + CollectHsMetrics \\ + $args \\ + $reference \\ + --BAIT_INTERVALS $bait_interval_list \\ + --TARGET_INTERVALS $target_interval_list \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectHsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} From 57ce5f1b5bb7a72baeb73c367c84d6c47d5e8ffc Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:33:53 +0200 Subject: [PATCH 14/34] Delete modules/nf-core /picard /collecthsmetrics directory --- .../nf-core /picard /collecthsmetrics/main.nf | 82 ------------------- 1 file changed, 82 deletions(-) delete mode 100644 modules/nf-core /picard /collecthsmetrics/main.nf diff --git a/modules/nf-core /picard /collecthsmetrics/main.nf b/modules/nf-core /picard /collecthsmetrics/main.nf deleted file mode 100644 index d721bc6f..00000000 --- a/modules/nf-core /picard /collecthsmetrics/main.nf +++ /dev/null @@ -1,82 +0,0 @@ -process PICARD_COLLECTHSMETRICS { - tag "$meta.id" - label 'process_single' - - conda "bioconda::picard=3.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" - - input: - tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(dict) - - output: - tuple val(meta), path("*_metrics") , emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - def bait_interval_list = bait_intervals - def bait_intervallist_cmd = "" - if (bait_intervals =~ /.(bed|bed.gz)$/){ - bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - def target_interval_list = target_intervals - def target_intervallist_cmd = "" - if (target_intervals =~ /.(bed|bed.gz)$/){ - target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - - """ - - $bait_intervallist_cmd - $target_intervallist_cmd - - picard \\ - -Xmx${avail_mem}M \\ - CollectHsMetrics \\ - $args \\ - $reference \\ - --BAIT_INTERVALS $bait_interval_list \\ - --TARGET_INTERVALS $target_interval_list \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.CollectHsMetrics.coverage_metrics - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ -} From 8c34e8688a2bd29311d9eeb5dddf7a4f934b5d44 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:37:10 +0200 Subject: [PATCH 15/34] Create meta.yml meta file --- .../nf-core/picard/collecthsmetrics/meta.yml | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 modules/nf-core/picard/collecthsmetrics/meta.yml diff --git a/modules/nf-core/picard/collecthsmetrics/meta.yml b/modules/nf-core/picard/collecthsmetrics/meta.yml new file mode 100644 index 00000000..fecad0e5 --- /dev/null +++ b/modules/nf-core/picard/collecthsmetrics/meta.yml @@ -0,0 +1,89 @@ +name: picard_collecthsmetrics +description: Collects hybrid-selection (HS) metrics for a SAM or BAM file. +keywords: + - alignment + - metrics + - statistics + - insert + - hybrid-selection + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + tool_dev_url: https://github.com/broadinstitute/picard/ + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: An aligned BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Optional aligned BAM/CRAM/SAM file index + pattern: "*.{bai,crai,sai}" + - bait_intervals: + type: file + description: An interval file that contains the locations of the baits used. + pattern: "*.{interval_list,bed,bed.gz}" + - target_intervals: + type: file + description: An interval file that contains the locations of the targets. + pattern: "*.{interval_list,bed,bed.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: | + A reference file to calculate dropout metrics measuring reduced representation of reads. + Optional input. + pattern: "*.{fa,fasta,fna}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Sequence dictionary of FASTA file. Only needed when bed interval lists are supplied. + pattern: "*.dict" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + +authors: + - "@projectoriented" + - "@matthdsm" From adb0e2c5bc1355c4bb2d146841410eb6f195364b Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:37:57 +0200 Subject: [PATCH 16/34] Delete modules/nf-core/picard/collecths metrics directory --- .../nf-core/picard/collecths metrics/main.nf | 82 ------------------- 1 file changed, 82 deletions(-) delete mode 100644 modules/nf-core/picard/collecths metrics/main.nf diff --git a/modules/nf-core/picard/collecths metrics/main.nf b/modules/nf-core/picard/collecths metrics/main.nf deleted file mode 100644 index d721bc6f..00000000 --- a/modules/nf-core/picard/collecths metrics/main.nf +++ /dev/null @@ -1,82 +0,0 @@ -process PICARD_COLLECTHSMETRICS { - tag "$meta.id" - label 'process_single' - - conda "bioconda::picard=3.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" - - input: - tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(dict) - - output: - tuple val(meta), path("*_metrics") , emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - - def bait_interval_list = bait_intervals - def bait_intervallist_cmd = "" - if (bait_intervals =~ /.(bed|bed.gz)$/){ - bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - def target_interval_list = target_intervals - def target_intervallist_cmd = "" - if (target_intervals =~ /.(bed|bed.gz)$/){ - target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") - target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." - } - - - """ - - $bait_intervallist_cmd - $target_intervallist_cmd - - picard \\ - -Xmx${avail_mem}M \\ - CollectHsMetrics \\ - $args \\ - $reference \\ - --BAIT_INTERVALS $bait_interval_list \\ - --TARGET_INTERVALS $target_interval_list \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.CollectHsMetrics.coverage_metrics - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ -} From 6c18bf5e62520231f2673b9be3641b2c9adf4319 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:38:53 +0200 Subject: [PATCH 17/34] Create main.nf collecths metrics module --- .../nf-core/picard/collecthsmetrics/main.nf | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/nf-core/picard/collecthsmetrics/main.nf diff --git a/modules/nf-core/picard/collecthsmetrics/main.nf b/modules/nf-core/picard/collecthsmetrics/main.nf new file mode 100644 index 00000000..d721bc6f --- /dev/null +++ b/modules/nf-core/picard/collecthsmetrics/main.nf @@ -0,0 +1,82 @@ +process PICARD_COLLECTHSMETRICS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*_metrics") , emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectHsMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def bait_interval_list = bait_intervals + def bait_intervallist_cmd = "" + if (bait_intervals =~ /.(bed|bed.gz)$/){ + bait_interval_list = bait_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + bait_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${bait_intervals} --OUTPUT ${bait_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + def target_interval_list = target_intervals + def target_intervallist_cmd = "" + if (target_intervals =~ /.(bed|bed.gz)$/){ + target_interval_list = target_intervals.toString().replaceAll(/.(bed|bed.gz)$/, ".interval_list") + target_intervallist_cmd = "picard -Xmx${avail_mem}M BedToIntervalList --INPUT ${target_intervals} --OUTPUT ${target_interval_list} --SEQUENCE_DICTIONARY ${dict} --TMP_DIR ." + } + + + """ + + $bait_intervallist_cmd + $target_intervallist_cmd + + picard \\ + -Xmx${avail_mem}M \\ + CollectHsMetrics \\ + $args \\ + $reference \\ + --BAIT_INTERVALS $bait_interval_list \\ + --TARGET_INTERVALS $target_interval_list \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectHsMetrics.coverage_metrics + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectHsMetrics.coverage_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectHsMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} From d139ddc0ab6695e2189f03d7e0c8c439eb004d84 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:43:47 +0200 Subject: [PATCH 18/34] Create main.nf --- .../picard/createsequencedictionary/main.nf | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 modules/nf-core/picard/createsequencedictionary/main.nf diff --git a/modules/nf-core/picard/createsequencedictionary/main.nf b/modules/nf-core/picard/createsequencedictionary/main.nf new file mode 100644 index 00000000..d07cc673 --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/main.nf @@ -0,0 +1,42 @@ +process PICARD_CREATESEQUENCEDICTIONARY { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dict"), emit: reference_dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CreateSequenceDictionary \\ + $args \\ + --REFERENCE $fasta \\ + --OUTPUT ${prefix}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} From d53b7dd88f800f7c03f50d6cf08dde81e72ac569 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:44:46 +0200 Subject: [PATCH 19/34] Create meta.yml --- .../picard/createsequencedictionary/meta.yml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 modules/nf-core/picard/createsequencedictionary/meta.yml diff --git a/modules/nf-core/picard/createsequencedictionary/meta.yml b/modules/nf-core/picard/createsequencedictionary/meta.yml new file mode 100644 index 00000000..3e04159d --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/meta.yml @@ -0,0 +1,45 @@ +name: picard_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence. +keywords: + - sequence + - dictionary + - picard +tools: + - picard: + description: | + Creates a sequence dictionary file (with ".dict" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dict: + type: file + description: picard dictionary file + pattern: "*.{dict}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" From c5d88699a7df69bc034884a948a21633bded19d6 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 15:53:16 +0200 Subject: [PATCH 20/34] Update methylseq.nf --- workflows/methylseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 99b4e013..8f0fa415 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -75,7 +75,7 @@ include { QUALIMAP_BAMQC } from '../modules/nf-core/ include { QUALIMAP_BAMQC as QUALIMAP_BAMQC2 } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/picard/createsequencedictionary/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../modules/nf-core/samtools/index/main' /* From 1c15794c9939057643a6303c8e8078b15808e48a Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Thu, 1 Jun 2023 16:13:09 +0200 Subject: [PATCH 21/34] Update methylseq.nf Corrections in the code to include the HS metrics into the multiqc module. --- workflows/methylseq.nf | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 8f0fa415..959925dc 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -86,6 +86,7 @@ include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../modules/nf // Info required for completion email and summary def multiqc_report = [] +ch_metrics = Channel.empty() workflow METHYLSEQ { @@ -220,12 +221,14 @@ workflow METHYLSEQ { */ if (params.target_interval){ - ch_bam_bai.value([[],ch_bam,ch_bai,file(params.target_intervals)]) - ch_fasta.value([[],PREPARE_GENOME.out.fasta,]) - ch_fai.value([[],PREPARE_GENOME.out.fasta_index]) - ch_dic.value([[],ch_dict]) - + ch_bam_bai = Channel.value([[],ch_bam,ch_bai,file(params.target_intervals)]) + ch_fasta = Channel.value([[],PREPARE_GENOME.out.fasta,]) + ch_fai = Channel.value([[],PREPARE_GENOME.out.fasta_index]) + ch_dic = Channel.value([[],ch_dict]) + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) + versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first()) + ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) } else { exit 2, "Target interval BED file not provided. HS metrics module will be skipped." } ch_bam // Pass the BAM channel directly to the next step without running the HS metrics module @@ -274,6 +277,8 @@ workflow METHYLSEQ { ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC2.out.results.collect{ it[0] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect{ it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_metrics.ifEmpty([])) + if (!params.skip_trimming) { ch_multiqc_files = ch_multiqc_files.mix(TRIMGALORE.out.log.collect{ it[1] }) } From 56126c10f8fd13854f0b77550d1697bfad43dcb8 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:07:49 +0200 Subject: [PATCH 22/34] Update methylseq.nf Input of create sequence dictionary module is now corrected --- workflows/methylseq.nf | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 959925dc..85d60db1 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -18,7 +18,6 @@ def checkPathParamList = [ params.bwa_meth_index, params.bismark_index, params.known_splices, - params.target_interval ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -209,29 +208,25 @@ workflow METHYLSEQ { /* * MODULE: Picard CreateSequenceDictionary */ - PICARD_CREATESEQUENCEDICTIONARY ( - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fasta_index - ) + ch_fasta = Channel.value([[:],PREPARE_GENOME.out.fasta]) + PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.dict versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) /* * MODULE: HS METRICS. */ - if (params.target_interval){ - - ch_bam_bai = Channel.value([[],ch_bam,ch_bai,file(params.target_intervals)]) - ch_fasta = Channel.value([[],PREPARE_GENOME.out.fasta,]) - ch_fai = Channel.value([[],PREPARE_GENOME.out.fasta_index]) - ch_dic = Channel.value([[],ch_dict]) - - PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) - versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first()) - ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) - } else { exit 2, "Target interval BED file not provided. HS metrics module will be skipped." } + + ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) + ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) + ch_bam_bai = Channel.value([[:],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + ch_fai = Channel.value([[:],PREPARE_GENOME.out.fasta_index]) + ch_dic = Channel.value([[:],ch_dict]) + + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dic) - ch_bam // Pass the BAM channel directly to the next step without running the HS metrics module + versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) + ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) /* From 34414474cb27eceb540cf63464948a929111fe67 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:31:36 +0200 Subject: [PATCH 23/34] Update nextflow_schema.json Addition of bait and target intervals configuration --- nextflow_schema.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index fe07cf05..4227483e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -127,6 +127,22 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "bait_intervals": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "description": "Path to BED file containing bait intervals." + "pattern": "^\\S+\\.bed$", + "fa_icon": "fas fa-procedures" + }, + "target_intervals": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed$", + "description": "Path to BED file containing target intervals.", + "fa_icon": "fas fa-procedures" } } }, From 914d33a26b9f26271c8021f6ee918314d528d29a Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:05:10 +0200 Subject: [PATCH 24/34] Update nextflow.config bait and target intervals are now set to false --- nextflow.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 76eaf702..487cdd9a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,7 +24,8 @@ params { multiqc_methods_description = null // HS metrics - params.target_intervals = null + target_intervals = null + bait_intervals = null // Intermediate files save_reference = false From 4c1477bac28ed45431be304088cd7f30207872b3 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 11:47:57 +0200 Subject: [PATCH 25/34] Update nextflow_schema.json Previously, the parameters were added manually, now the file is updated using nf-core schema build tool. --- nextflow_schema.json | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4227483e..13fc6d1b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -127,22 +127,6 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "bait_intervals": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "description": "Path to BED file containing bait intervals." - "pattern": "^\\S+\\.bed$", - "fa_icon": "fas fa-procedures" - }, - "target_intervals": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.bed$", - "description": "Path to BED file containing target intervals.", - "fa_icon": "fas fa-procedures" } } }, @@ -653,5 +637,21 @@ { "$ref": "#/definitions/institutional_config_options" } - ] + ], + "properties": { + "target_intervals": { + "type": "string", + "default": "None", + "hidden": true, + "description": "Path to BED file with Target intervals" + }, + "bait_intervals": { + "type": "string", + "default": "None", + "hidden": true, + "description": "Path to Bed file with Bait intervals" + } + } } + + From 6b9d06284c425b4d7260d15d13ea9746ed4e5d93 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 11:55:37 +0200 Subject: [PATCH 26/34] Update methylseq.nf --- workflows/methylseq.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 85d60db1..ba458104 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -208,7 +208,7 @@ workflow METHYLSEQ { /* * MODULE: Picard CreateSequenceDictionary */ - ch_fasta = Channel.value([[:],PREPARE_GENOME.out.fasta]) + ch_fasta = Channel.value([[],PREPARE_GENOME.out.fasta]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.dict versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) @@ -219,9 +219,10 @@ workflow METHYLSEQ { ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) - ch_bam_bai = Channel.value([[:],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) - ch_fai = Channel.value([[:],PREPARE_GENOME.out.fasta_index]) - ch_dic = Channel.value([[:],ch_dict]) + + ch_bam_bai = Channel.value([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + ch_fai = Channel.value([[],PREPARE_GENOME.out.fasta_index]) + ch_dic = Channel.value([[],ch_dict]) PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dic) From ca65020f0b816eec9e1efc46684c75d9c50eb5f6 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 13:00:45 +0200 Subject: [PATCH 27/34] Update methylseq.nf corect a miss matched bracket. ' Qualimap as Qualimap2' was deleated and the multiqc channel from this module was also deleated. --- workflows/methylseq.nf | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index ba458104..8be8aa3e 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -71,11 +71,10 @@ include { MULTIQC } from '../modules/nf-core/ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' -include { QUALIMAP_BAMQC as QUALIMAP_BAMQC2 } from '../modules/nf-core/qualimap/bamqc/main' include { PRESEQ_LCEXTRAP } from '../modules/nf-core/preseq/lcextrap/main' -include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard_collecthsmetrics/main' +include { PICARD_COLLECTHSMETRICS } from '../modules/nf-core/picard/collecthsmetrics/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../modules/nf-core/samtools/index/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -248,8 +247,7 @@ workflow METHYLSEQ { ch_bam ) versions = versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) - -} + CUSTOM_DUMPSOFTWAREVERSIONS ( versions.unique().collectFile(name: 'collated_versions.yml') @@ -270,7 +268,6 @@ workflow METHYLSEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC.out.results.collect{ it[0] }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QUALIMAP_BAMQC2.out.results.collect{ it[0] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.log.collect{ it[1] }.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_aligner_mqc.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_metrics.ifEmpty([])) From c6b95031227350d2232aa86abbdbfdd9cdd3f4ba Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 13:39:46 +0200 Subject: [PATCH 28/34] Update methylseq.nf Dictionary now is created directly as a value channel --- workflows/methylseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 8be8aa3e..2b095877 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -209,7 +209,7 @@ workflow METHYLSEQ { */ ch_fasta = Channel.value([[],PREPARE_GENOME.out.fasta]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) - ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.dict + ch_dict = Channel.value([[],PICARD_CREATESEQUENCEDICTIONARY.out.dict]) versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) /* @@ -221,7 +221,7 @@ workflow METHYLSEQ { ch_bam_bai = Channel.value([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) ch_fai = Channel.value([[],PREPARE_GENOME.out.fasta_index]) - ch_dic = Channel.value([[],ch_dict]) + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dic) From ecd6078a82f0c33e4a767e349f631a3cd60db87f Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 13:49:29 +0200 Subject: [PATCH 29/34] Update methylseq.nf of method instead of value were used to decleared the channels --- workflows/methylseq.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 2b095877..9c391238 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -207,9 +207,9 @@ workflow METHYLSEQ { /* * MODULE: Picard CreateSequenceDictionary */ - ch_fasta = Channel.value([[],PREPARE_GENOME.out.fasta]) + ch_fasta = Channel.of([[],PREPARE_GENOME.out.fasta]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) - ch_dict = Channel.value([[],PICARD_CREATESEQUENCEDICTIONARY.out.dict]) + ch_dict = Channel.of([[],PICARD_CREATESEQUENCEDICTIONARY.out.dict]) versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) /* @@ -219,8 +219,8 @@ workflow METHYLSEQ { ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) - ch_bam_bai = Channel.value([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) - ch_fai = Channel.value([[],PREPARE_GENOME.out.fasta_index]) + ch_bam_bai = Channel.of([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + ch_fai = Channel.of([[],PREPARE_GENOME.out.fasta_index]) PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dic) From 987cdeca749746d20473ae57535cbde2f4a63abb Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 7 Jun 2023 15:55:37 +0200 Subject: [PATCH 30/34] Update methylseq.nf Modification in the dictionary channel, instead of decleared with channel.of method is now collected. --- workflows/methylseq.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 9c391238..b03199ea 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -209,7 +209,7 @@ workflow METHYLSEQ { */ ch_fasta = Channel.of([[],PREPARE_GENOME.out.fasta]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) - ch_dict = Channel.of([[],PICARD_CREATESEQUENCEDICTIONARY.out.dict]) + ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict.collect() versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) /* @@ -222,8 +222,7 @@ workflow METHYLSEQ { ch_bam_bai = Channel.of([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) ch_fai = Channel.of([[],PREPARE_GENOME.out.fasta_index]) - - PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dic) + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) From 4bec010984e71c809723e19bbd71138cabe07c00 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Fri, 9 Jun 2023 11:07:47 +0200 Subject: [PATCH 31/34] Update methylseq.nf New channels were created to map the correct elements for the HS metrics module. --- workflows/methylseq.nf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index b03199ea..884c554f 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -219,11 +219,15 @@ workflow METHYLSEQ { ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) - ch_bam_bai = Channel.of([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + ch_bam_bai = Channel.combine([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + + ch_bam_bai_T = ch_bam_bai.map { meta, bam, bai, bait_intervals, target_intervals -> + [meta, bam, bai, bait_intervals, target_intervals]} ch_fai = Channel.of([[],PREPARE_GENOME.out.fasta_index]) - - PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) - + ch_meta_fasta = ch_fasta.map {meta, fasta -> [meta,fasta]}.collect() + ch_meta_fai = ch_fai.map {meta, fai -> [meta,fai]}.collect() + + PICARD_COLLECTHSMETRICS (ch_bam_bai_T,ch_meta_fasta,ch_meta_fai,ch_dict) versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) From 3cc9e877af3d7bf524e540ede7ae915bc154a529 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Tue, 13 Jun 2023 00:17:50 +0200 Subject: [PATCH 32/34] Update methylseq.nf Delete redundant code. map function wasn't really usefull --- workflows/methylseq.nf | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 884c554f..407029a7 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -220,18 +220,12 @@ workflow METHYLSEQ { ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) ch_bam_bai = Channel.combine([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) - - ch_bam_bai_T = ch_bam_bai.map { meta, bam, bai, bait_intervals, target_intervals -> - [meta, bam, bai, bait_intervals, target_intervals]} ch_fai = Channel.of([[],PREPARE_GENOME.out.fasta_index]) - ch_meta_fasta = ch_fasta.map {meta, fasta -> [meta,fasta]}.collect() - ch_meta_fai = ch_fai.map {meta, fai -> [meta,fai]}.collect() - - PICARD_COLLECTHSMETRICS (ch_bam_bai_T,ch_meta_fasta,ch_meta_fai,ch_dict) + + PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) - /* * MODULE: Qualimap BamQC */ From 0a71f8bdce19a5a7622d3930fa5a9ff34ee51397 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Tue, 13 Jun 2023 10:45:34 +0200 Subject: [PATCH 33/34] Update methylseq.nf --- workflows/methylseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 407029a7..85317231 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -207,7 +207,7 @@ workflow METHYLSEQ { /* * MODULE: Picard CreateSequenceDictionary */ - ch_fasta = Channel.of([[],PREPARE_GENOME.out.fasta]) + ch_fasta = Channel.of([[:],PREPARE_GENOME.out.fasta]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict.collect() versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) @@ -219,8 +219,8 @@ workflow METHYLSEQ { ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) - ch_bam_bai = Channel.combine([[],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) - ch_fai = Channel.of([[],PREPARE_GENOME.out.fasta_index]) + ch_bam_bai = Channel.combine([[:],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) + ch_fai = Channel.of([[:],PREPARE_GENOME.out.fasta_index]) PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) From f8ae01d7363d45727e1eb19e9946605cd96a9be8 Mon Sep 17 00:00:00 2001 From: RogerHw <131660991+RogerHw@users.noreply.github.com> Date: Wed, 14 Jun 2023 18:25:33 +0200 Subject: [PATCH 34/34] Channels were updated. Ch_fasta and ch_fai are now value channels and are mapped by map function. Also, combine function was used to merge the four channels in one --- workflows/methylseq.nf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/workflows/methylseq.nf b/workflows/methylseq.nf index 85317231..c52f326f 100644 --- a/workflows/methylseq.nf +++ b/workflows/methylseq.nf @@ -207,7 +207,7 @@ workflow METHYLSEQ { /* * MODULE: Picard CreateSequenceDictionary */ - ch_fasta = Channel.of([[:],PREPARE_GENOME.out.fasta]) + ch_fasta = PREPARE_GENOME.out.fasta.map{ fasta -> [[:],fasta]}.collect() PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) ch_dict = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict.collect() versions = versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) @@ -219,10 +219,14 @@ workflow METHYLSEQ { ch_bait_intervals = params.bait_intervals ? Channel.value(file(params.bait_intervals, checkIfExists: true)) : Channel.value([]) ch_target_intervals = params.target_intervals ? Channel.value(file(params.target_intervals, checkIfExists: true)) : Channel.value([]) - ch_bam_bai = Channel.combine([[:],ch_bam,ch_bai,ch_bait_intervals,ch_target_intervals]) - ch_fai = Channel.of([[:],PREPARE_GENOME.out.fasta_index]) + ch_fai = PREPARE_GENOME.out.fasta_index.map{ fai -> [[:],fai]}.collect() - PICARD_COLLECTHSMETRICS (ch_bam_bai,ch_fasta,ch_fai,ch_dict) + ch_bam_bai = ch_bam.combine(ch_bai) + ch_intervals = ch_target_intervals.combine(ch_bait_intervals) + + ch_bam_intervals = ch_bam_bai.combine(ch_intervals) + + PICARD_COLLECTHSMETRICS (ch_bam_intervals,ch_fasta,ch_fai,ch_dic) versions = versions.mix(PICARD_COLLECTHSMETRICS.out.versions) ch_metrics = ch_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics)