nf-core-test · mirpedrol · Nov 27, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 8, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,17 @@
+*.code-workspace
+*.pyc
+*.pyo
+.*.sw?
+.DS_Store
+.nextflow*
+.nf-test.log
+.nf-test/
+.screenrc
+__pycache__
+output/
+results/
+test.xml
+test_output/
+tests/data/
+work/
+.github/CODEOWNERS-tmp
diff --git a/README.md b/README.md
@@ -1,2 +1,3 @@
 # modules
-A small version of the nf-core/modules used for nf-core/tools testing
+A small version of the nf-core/modules used for nf-core/tools testing of cross-organisational
+subworkflows, i.e. subworkflows that use modules from more than one remote.
diff --git a/modules/nf-core-test/fastqc/main.nf b/modules/nf-core-test/fastqc/main.nf
@@ -0,0 +1,55 @@
+
+process FASTQC {
+    tag "${meta.id}"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0'}"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.html"), emit: html
+    tuple val(meta), path("*.zip"), emit: zip
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[reads, "${prefix}.${reads.extension}"]] : reads.withIndex().collect { entry, index -> [entry, "${prefix}_${index + 1}.${entry.extension}"] }
+    def rename_to = old_new_pairs.join(' ').join(' ')
+    def renamed_files = old_new_pairs.collect { old_name, new_name -> new_name }.join(' ')
+    def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus
+    def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+    """
+    printf "%s %s\\n" ${rename_to} | while read old_name new_name; do
+        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+    done
+
+    fastqc \\
+        ${args} \\
+        --threads ${task.cpus} \\
+        --memory ${fastqc_memory} \\
+        ${renamed_files}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.html
+    touch ${prefix}.zip
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core-test/fastqc/meta.yml b/modules/nf-core-test/fastqc/meta.yml
@@ -0,0 +1,57 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+  - quality control
+  - qc
+  - adapters
+  - fastq
+tools:
+  - fastqc:
+      description: |
+        FastQC gives general quality metrics about your reads.
+        It provides information about the quality score distribution
+        across your reads, the per base sequence content (%A/C/G/T).
+        You get information about adapter contamination and other
+        overrepresented sequences.
+      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+      licence: ["GPL-2.0-only"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - html:
+      type: file
+      description: FastQC report
+      pattern: "*_{fastqc.html}"
+  - zip:
+      type: file
+      description: FastQC report archive
+      pattern: "*_{fastqc.zip}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@grst"
+  - "@ewels"
+  - "@FelixKrueger"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
+  - "@ewels"
+  - "@FelixKrueger"
diff --git a/modules/nf-core-test/prokka/environment.yml b/modules/nf-core-test/prokka/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::prokka=1.14.6
diff --git a/modules/nf-core-test/prokka/main.nf b/modules/nf-core-test/prokka/main.nf
@@ -0,0 +1,52 @@
+process PROKKA {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' :
+        'biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }"
+
+    input:
+    tuple val(meta), path(fasta)
+    path proteins
+    path prodigal_tf
+
+    output:
+    tuple val(meta), path("${prefix}/*.gff"), emit: gff
+    tuple val(meta), path("${prefix}/*.gbk"), emit: gbk
+    tuple val(meta), path("${prefix}/*.fna"), emit: fna
+    tuple val(meta), path("${prefix}/*.faa"), emit: faa
+    tuple val(meta), path("${prefix}/*.ffn"), emit: ffn
+    tuple val(meta), path("${prefix}/*.sqn"), emit: sqn
+    tuple val(meta), path("${prefix}/*.fsa"), emit: fsa
+    tuple val(meta), path("${prefix}/*.tbl"), emit: tbl
+    tuple val(meta), path("${prefix}/*.err"), emit: err
+    tuple val(meta), path("${prefix}/*.log"), emit: log
+    tuple val(meta), path("${prefix}/*.txt"), emit: txt
+    tuple val(meta), path("${prefix}/*.tsv"), emit: tsv
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    def proteins_opt = proteins ? "--proteins ${proteins[0]}" : ""
+    def prodigal_tf = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : ""
+    """
+    prokka \\
+        $args \\
+        --cpus $task.cpus \\
+        --prefix $prefix \\
+        $proteins_opt \\
+        $prodigal_tf \\
+        $fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        prokka: \$(echo \$(prokka --version 2>&1) | sed 's/^.*prokka //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core-test/prokka/meta.yml b/modules/nf-core-test/prokka/meta.yml
@@ -0,0 +1,90 @@
+name: prokka
+description: Whole genome annotation of small genomes (bacterial, archeal, viral)
+keywords:
+  - annotation
+  - fasta
+  - prokka
+tools:
+  - prokka:
+      description: Rapid annotation of prokaryotic genomes
+      homepage: https://github.com/tseemann/prokka
+      doi: "10.1093/bioinformatics/btu153"
+      licence: ["GPL v2"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: |
+        FASTA file to be annotated. Has to contain at least a non-empty string dummy value.
+  - proteins:
+      type: file
+      description: FASTA file of trusted proteins to first annotate from (optional)
+  - prodigal_tf:
+      type: file
+      description: Training file to use for Prodigal (optional)
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - gff:
+      type: file
+      description: annotation in GFF3 format, containing both sequences and annotations
+      pattern: "*.{gff}"
+  - gbk:
+      type: file
+      description: annotation in GenBank format, containing both sequences and annotations
+      pattern: "*.{gbk}"
+  - fna:
+      type: file
+      description: nucleotide FASTA file of the input contig sequences
+      pattern: "*.{fna}"
+  - faa:
+      type: file
+      description: protein FASTA file of the translated CDS sequences
+      pattern: "*.{faa}"
+  - ffn:
+      type: file
+      description: nucleotide FASTA file of all the prediction transcripts (CDS, rRNA, tRNA, tmRNA, misc_RNA)
+      pattern: "*.{ffn}"
+  - sqn:
+      type: file
+      description: an ASN1 format "Sequin" file for submission to Genbank
+      pattern: "*.{sqn}"
+  - fsa:
+      type: file
+      description: nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file
+      pattern: "*.{fsa}"
+  - tbl:
+      type: file
+      description: feature Table file, used by "tbl2asn" to create the .sqn file
+      pattern: "*.{tbl}"
+  - err:
+      type: file
+      description: unacceptable annotations - the NCBI discrepancy report.
+      pattern: "*.{err}"
+  - log:
+      type: file
+      description: contains all the output that Prokka produced during its run
+      pattern: "*.{log}"
+  - txt:
+      type: file
+      description: statistics relating to the annotated features found
+      pattern: "*.{txt}"
+  - tsv:
+      type: file
+      description: tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product)
+      pattern: "*.{tsv}"
+authors:
+  - "@rpetit3"
+maintainers:
+  - "@rpetit3"
diff --git a/modules/nf-core-test/wget/main.nf b/modules/nf-core-test/wget/main.nf
@@ -0,0 +1,22 @@
+process WGET {
+    label 'process_single'
+
+    conda "conda-forge::python=3.8.3"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
+        'biocontainers/python:3.8.3' }"
+
+    input:
+    tuple val(meta), val(url)
+
+    output:
+    tuple val(meta), path("${meta.id}.fna"), emit: fasta
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    wget --output-document ${meta.id}.fna $url
+    """
+}