Skip to content

Commit

Permalink
new module: Picard positionbaseddownsamplesam (#3253)
Browse files Browse the repository at this point in the history
* correct urls for MethylDackel

* adds positionbaseddownsamplesam

* adds tests for positionbasedownsamplesam module

* removes target_num_read from input params, renaming based on actual results instead

* Apply suggestions from code review

Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com>

* Swap to nf-test

---------

Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 22, 2024
1 parent 13c00e4 commit eb093e6
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: picard_positionbaseddownsamplesam
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::picard=3.1.1
62 changes: 62 additions & 0 deletions modules/nf-core/picard/positionbaseddownsamplesam/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process PICARD_POSITIONBASEDDOWNSAMPLESAM {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' :
'biocontainers/picard:3.1.1--hdfd78af_0' }"

input:
tuple val(meta), path(bam), val(fraction)

output:
tuple val(meta), path("*.ds*.bam") , emit: bam
tuple val(meta), path("*.ds*.bai") , emit: bai, optional:true
tuple val(meta), env(ACTUAL_NUM_READS) , emit: num_reads, optional:true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[Picard PositionBasedDownsampleSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
picard \\
-Xmx${avail_mem}g \\
PositionBasedDownsampleSam \\
$args \\
--CREATE_INDEX \\
--INPUT $bam \\
--OUTPUT ${prefix}.ds.bam \\
--FRACTION ${fraction} 2> tool_stderr
ACTUAL_NUM_READS=\$(tail -n 10 tool_stderr | grep Kept | sed -E 's/.*Kept ([0-9]+) out of.*/\\1/')
mv "${prefix}.ds.bam" "${prefix}.ds\${ACTUAL_NUM_READS}.bam"
mv "${prefix}.ds.bai" "${prefix}.ds\${ACTUAL_NUM_READS}.bai"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard PositionBasedDownsampleSam --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.ds10.bam
touch ${prefix}.ds10.bam.bai
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(echo \$(picard PositionBasedDownsampleSam --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}
51 changes: 51 additions & 0 deletions modules/nf-core/picard/positionbaseddownsamplesam/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: picard_positionbaseddownsamplesam
description: Samples a SAM/BAM/CRAM file using flowcell position information for the best approximation of having sequenced fewer reads
keywords:
- sample
- bam
- sam
- cram
tools:
- picard:
description: |
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
data and formats such as SAM/BAM/CRAM and VCF.
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam,cram,sam}"
- fraction:
type: float
description: Fraction of reads to downsample to
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: A downsampled BAM file
pattern: "*.{bam}"
- bai:
type: file
description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag
pattern: "*.{bai}"
- num_reads:
type: integer
description: The actual number of downsampled reads
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@bwlang"
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
nextflow_process {

name "Test Process PICARD_POSITIONBASEDDOWNSAMPLESAM"
script "../main.nf"
process "PICARD_POSITIONBASEDDOWNSAMPLESAM"

tag "modules"
tag "modules_nfcore"
tag "picard"
tag "picard/positionbaseddownsamplesam"

test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
0.9
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions,
process.out.bam.collect { bam(it[1]).getHeaderMD5() },
process.out.bam.collect { bam(it[1]).getReadsMD5() },
process.out.bai).match() }
)
}

}

test("sarscov2 - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test'], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
0.5
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"sarscov2 - bam - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.ds10.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test"
},
"test.ds10.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
[
{
"id": "test"
},
""
]
],
"3": [
"versions.yml:md5,368e5d7049e6f6fcd8497673f6b0ba60"
],
"bai": [
[
{
"id": "test"
},
"test.ds10.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"bam": [
[
{
"id": "test"
},
"test.ds10.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"num_reads": [
[
{
"id": "test"
},
""
]
],
"versions": [
"versions.yml:md5,368e5d7049e6f6fcd8497673f6b0ba60"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-27T05:11:00.828032096"
},
"sarscov2 - bam": {
"content": [
[
"versions.yml:md5,368e5d7049e6f6fcd8497673f6b0ba60"
],
[
"4a774906f03f07ec9d5c36902047afb7"
],
[
"894549ee3ced6b5ca2eed2563a985217"
],
[
[
{
"id": "test"
},
"test.ds200.bai:md5,ea4e440cbb1d4e5be16dbe76981e8d44"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-06-27T05:24:58.391114796"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
picard/positionbaseddownsamplesam:
- "modules/nf-core/picard/positionbaseddownsamplesam/**"

0 comments on commit eb093e6

Please sign in to comment.