Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cleanup steps to Metagenomics db build tools (Kaiju, KrakenUniq) #7317

Merged
merged 16 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions modules/nf-core/kaiju/mkfmi/main.nf
Original file line number Diff line number Diff line change
@@ -1,33 +1,39 @@
process KAIJU_MKFMI {
tag "$meta.id"
tag "${meta.id}"
label 'process_high'

conda "bioconda::kaiju=1.10.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.10.0--h43eeafb_0':
'biocontainers/kaiju:1.10.0--h43eeafb_0' }"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/kaiju:1.10.0--h43eeafb_0'
: 'biocontainers/kaiju:1.10.0--h43eeafb_0'}"

input:
tuple val(meta), path(fasta)
val keep_intermediate

output:
tuple val(meta), path("*.fmi"), emit: fmi
path "versions.yml" , emit: versions
tuple val(meta), path("*.bwt"), optional: true, emit: bwt
tuple val(meta), path("*.sa"), optional: true, emit: sa
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def run_cleanup = keep_intermediate ? "" : "rm -f *.{bwt,sa}"
"""
kaiju-mkbwt \\
$args \\
-n $task.cpus \\
${args} \\
-n ${task.cpus} \\
-o ${prefix} \\
${fasta}
kaiju-mkfmi ${prefix}

${run_cleanup}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
Expand All @@ -37,8 +43,13 @@ process KAIJU_MKFMI {
stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def run_cleanup = keep_intermediate ? "" : "rm -f *.{bwt,sa}"
"""
touch ${prefix}.fmi
touch ${prefix}.bwt
touch ${prefix}.sa

${run_cleanup}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
25 changes: 25 additions & 0 deletions modules/nf-core/kaiju/mkfmi/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ input:
type: file
description: Uncompressed Protein FASTA file (mandatory)
pattern: "*.{fa,faa,fasta}"
- - keep_intermediate:
type: boolean
description: "Keep intermediate files"
pattern: true|false
output:
- fmi:
- meta:
Expand All @@ -37,10 +41,31 @@ output:
type: file
description: Kaiju FM-index file
pattern: "*.{fmi}"
- bwt:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*.bwt":
type: file
description: Kaiju intermedite bwt-index file (not needed for classification)
pattern: "*.{bwt}"
- sa:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- "*.sa":
type: file
description: Kaiju intermedite bwt-index file (not needed for classification)
pattern: "*.{sa}"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@alxndrdiaz"
- "@jfy133"
25 changes: 25 additions & 0 deletions modules/nf-core/kaiju/mkfmi/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ nextflow_process {
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true)
]
input[1] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - proteome - keep intermediates") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true)
]
input[1] = true
"""
}
}
Expand All @@ -42,6 +66,7 @@ nextflow_process {
[ id:'test', single_end:true ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true)
]
input[1] = true
"""
}
}
Expand Down
93 changes: 92 additions & 1 deletion modules/nf-core/kaiju/mkfmi/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"content": [
"test.fmi"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2024-01-20T16:27:00.670884904"
},
"sarscov2 - proteome - fasta": {
Expand All @@ -18,7 +22,16 @@
]
],
"1": [

],
"2": [

],
"3": [
"versions.yml:md5,3cbd427d0187ffee188347830d33dc12"
],
"bwt": [

],
"fmi": [
[
Expand All @@ -29,11 +42,89 @@
"test.fmi:md5,54fd89f5e4eab61af30175e8aa389598"
]
],
"sa": [

],
"versions": [
"versions.yml:md5,3cbd427d0187ffee188347830d33dc12"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2025-01-16T09:29:43.118144301"
},
"sarscov2 - proteome - keep intermediates": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": true
},
"test.fmi:md5,54fd89f5e4eab61af30175e8aa389598"
]
],
"1": [
[
{
"id": "test",
"single_end": true
},
"test.bwt:md5,fe639339371f5b4352017522f54d0c99"
]
],
"2": [
[
{
"id": "test",
"single_end": true
},
"test.sa:md5,7d4a6481ad5ffdc2058bb35331a64d4d"
]
],
"3": [
"versions.yml:md5,3cbd427d0187ffee188347830d33dc12"
],
"bwt": [
[
{
"id": "test",
"single_end": true
},
"test.bwt:md5,fe639339371f5b4352017522f54d0c99"
]
],
"fmi": [
[
{
"id": "test",
"single_end": true
},
"test.fmi:md5,54fd89f5e4eab61af30175e8aa389598"
]
],
"sa": [
[
{
"id": "test",
"single_end": true
},
"test.sa:md5,7d4a6481ad5ffdc2058bb35331a64d4d"
]
],
"versions": [
"versions.yml:md5,3cbd427d0187ffee188347830d33dc12"
]
}
],
"timestamp": "2024-01-20T16:26:48.062489887"
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.2"
},
"timestamp": "2025-01-16T09:29:47.890853152"
}
}
42 changes: 33 additions & 9 deletions modules/nf-core/krakenuniq/build/main.nf
Original file line number Diff line number Diff line change
@@ -1,34 +1,58 @@
process KRAKENUNIQ_BUILD {
tag "$meta.id"
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h6dccd9a_2':
'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2' }"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.4--pl5321h6dccd9a_2'
: 'biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_2'}"

input:
tuple val(meta), path(custom_library_dir, stageAs: "library/*"), path(custom_taxonomy_dir, stageAs: "taxonomy"), path(custom_seqid2taxid)
val keep_intermediate

output:
tuple val(meta), path("$prefix/"), emit: db
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}/"), emit: db
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
custom_db = custom_library_dir ? "mkdir $prefix && mv library taxonomy $custom_seqid2taxid $prefix" : ""
custom_db = custom_library_dir ? "mkdir ${prefix} && mv library taxonomy ${custom_seqid2taxid} ${prefix}" : ""
run_cleanup = keep_intermediate ? "" : "find -L ${prefix} -type f -not -name \"*.kdb\" -type f -not -name \"*idx\" -not -name \"taxDB\" -delete"

"""
$custom_db
${custom_db}

krakenuniq-build \\
$args \\
${args} \\
--threads ${task.cpus} \\
--db ${prefix}

${run_cleanup}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
run_cleanup = keep_intermediate ? "" : "find -L ${prefix} -type f -not -name \"*.kdb\" -type f -not -name \"*idx\" -not -name \"taxDB\" -delete"
"""
mkdir ${prefix}/
touch ${prefix}/database-build.log
touch ${prefix}/database.idx
touch ${prefix}/database.jdb
touch ${prefix}/database.kdb
touch ${prefix}/database.kdb.counts
touch ${prefix}/database.kraken.tsv
touch ${prefix}/database.report.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
Expand Down
7 changes: 6 additions & 1 deletion modules/nf-core/krakenuniq/build/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,19 @@ input:
- custom_seqid2taxid:
type: file
description: custom seqid2taxid
- - keep_intermediate:
type: boolean
description: "Keep intermediate files"
pattern: true|false

output:
- db:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- $prefix/:
- ${prefix}/:
type: directory
description: Directory containing KrakenUniq database
pattern: "*/"
Expand Down
Loading
Loading