Skip to content

Commit

Permalink
add meta.yml for each local module
Browse files Browse the repository at this point in the history
  • Loading branch information
LilyAnderssonLee committed Jan 9, 2025
1 parent d259ea4 commit d37ee4b
Show file tree
Hide file tree
Showing 12 changed files with 375 additions and 14 deletions.
34 changes: 34 additions & 0 deletions modules/local/extract_viral_taxid/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process EXTRACT_VIRAL_TAXID {

tag "$meta.id"
label 'process_low'

input:
val evalue // e-vaule threshold to filter the diamond report
tuple val(meta), path(taxpasta_standardised_profile)
tuple val(meta), path(report) // classification report

output:
tuple val(meta), path("*viral_taxids.tsv"), optional:true, emit: viral_taxid

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}_${meta.tool}"

"""
if grep -qi "virus" $taxpasta_standardised_profile; then
grep -i "virus" $taxpasta_standardised_profile | cut -f 1 > taxpasta_viral_taxid.txt
if [[ "${meta.tool}" == "kraken2" || "${meta.tool}" == "centrifuge" ]]; then
awk -F'\t' '\$3 != 0 {print \$5}' ${report} > detected_taxid.txt
grep -F -w -f taxpasta_viral_taxid.txt detected_taxid.txt > ${prefix}_viral_taxids.tsv
elif [[ "${meta.tool}" == "diamond" ]]; then
awk '\$3 < ${evalue}' ${report} | cut -f 2 | uniq > detected_taxid.txt
grep -F -w -f taxpasta_viral_taxid.txt detected_taxid.txt | uniq > ${prefix}_viral_taxids.tsv
fi
else
echo "No viral taxids found." > "no_viral_taxid.txt"
fi
"""
}
38 changes: 38 additions & 0 deletions modules/local/extract_viral_taxid/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: extract_viral_taxid
description: Extract the taxid of viruses identified by the classifier (Kraken2/Centrifuge/DIAMOND)
keywards:
- taxid
- virus
- taxpasta
- kraken2
- centrifuge
- diamond
input:
- evalue:
type: ["number", "integer"]
description: A e-vaule threshold to filter the diamond classification result
- - meta:
type: map
description: |
Groovy map containing sample information
e.g. [ id:''test, single_end:false]
- - taxpasta_standardised_profile:
type: file
description: Path to the taxpasta standardised profile
- - report:
type: file
description: |
Path to the classifier report (Kraken2/Centrifuge) or TSV file (DIAMOND)
containing the identified viral taxid.
output:
- - meta:
type: map
description: |
Groovy map containing sample information
e.g. [ id:''test, single_end:false]
- - viral_taxid:
type: file
description: Extract viral taxid
pattern: "*viral_taxids.tsv"
authors:
- "@LilyAnderssonLee"
42 changes: 42 additions & 0 deletions modules/local/extractcentrifugereads/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
process EXTRACTCENTRIFUGEREADS {

tag "$meta.id"
label 'process_low'

conda "bioconda::seqkit=2.8.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.8.2--h9ee0642_1':
'biocontainers/seqkit:2.8.2--h9ee0642_1' }"

input:
val taxid
tuple val (meta), path(results)
tuple val (meta), path(fastq) // bowtie2/align *unmapped_{1,2}.fastq.gz

output:
tuple val(meta), path("*.fastq"), optional:true, emit: extracted_centrifuge_reads
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
awk -v taxID=$taxid '\$3 == taxID && \$8 == 1 {print \$1}' $results > readID.txt
if [ "${meta.single_end}" == 'true' ]; then
seqkit grep -f readID.txt $fastq > ${prefix}_${taxid}.extracted_centrifuge_read.fastq
elif [ "${meta.single_end}" == 'false' ]; then
seqkit grep -f readID.txt ${fastq[0]} > ${prefix}_${taxid}.extracted_centrifuge_read1.fastq
seqkit grep -f readID.txt ${fastq[1]} > ${prefix}_${taxid}.extracted_centrifuge_read2.fastq
fi
rm readID.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit version | sed 's/seqkit v//' )
END_VERSIONS
"""
}
41 changes: 41 additions & 0 deletions modules/local/extractcentrifugereads/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: extractcentrifuge
description: Use a custom python script to extract reads with specified taxonomic ID from the Centrifuge classification output
keywards:
- taxid
- centrifuge
- results
- fastq
- extract_reads
input:
- taxid:
type: integer
description: A taxonomic ID to extract the reads
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - results:
type: file
description: File containing classification results
pattern: "*.{results.txt}"
- - fastq:
type: file
description: FastQ files
pattern: "*.fastq.gz"
output:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- - extracted_centrifuge_reads:
type: file
description: FastQ files contain the extracted reads of a specified taxid
pattern: "*fastq"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@LilyAnderssonLee"
43 changes: 43 additions & 0 deletions modules/local/extractdiamondreads/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process EXTRACTCDIAMONDREADS {

tag "$meta.id"
label 'process_high'

conda "bioconda::seqkit=2.8.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.8.2--h9ee0642_1':
'biocontainers/seqkit:2.8.2--h9ee0642_1' }"

input:
val taxid
tuple val (meta), path(tsv)
tuple val (meta), path(fastq) // bowtie2/align *unmapped_{1,2}.fastq.gz

output:
tuple val(meta), path("*.fastq"), optional:true, emit: extracted_diamond_reads
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
awk -v taxID=$taxid '\$2 == taxID {print \$1}' $tsv > readID.txt
if [ ${meta.single_end} == 'true' ]; then
seqkit grep -f readID.txt $fastq > ${prefix}_${taxid}.extracted_diamond_read.fastq
elif [ "${meta.single_end}" == 'false' ]; then
seqkit grep -f readID.txt ${fastq[0]} > ${prefix}_${taxid}.extracted_diamond_read1.fastq
seqkit grep -f readID.txt ${fastq[1]} > ${prefix}_${taxid}.extracted_diamond_read2.fastq
fi
rm readID.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit version | sed 's/seqkit v//' )
END_VERSIONS
"""
}
36 changes: 36 additions & 0 deletions modules/local/extractdiamondreads/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: extractdiamondreads
description: Use a custom python script to extract reads with specified taxonomic ID from the DIAMOND classification output
keywards:
- taxid
- DIAMOND/blastx
- tsv
- fastq
- extract_reads
input:
- taxid:
type: integer
description: A taxonomic ID to extract the reads
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tsv:
type: file
description: Tab separated file containing DIAMOND taxonomic classification of hits
pattern: "*.tsv"
- fastq:
type: file
description: FastQ files
pattern: "*.fastq.gz"
output:
- extracted_diamond_reads:
type: file
description: FastQ files contain the extracted reads of a specified taxid
pattern: "*fastq"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@LilyAnderssonLee"
27 changes: 27 additions & 0 deletions modules/local/rm_empty_fastq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process RM_EMPTY_FASTQ {

label 'process_low'

input:
path folder

output:
path folder, optional: true

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
if [ -d ${folder} ]; then
for f in ${folder}/*.fastq; do
if [ ! -s \$f ]; then
rm \$f
fi
done
else
echo "Folder ${folder} doesn't exist."
fi
"""
}
17 changes: 17 additions & 0 deletions modules/local/rm_empty_fastq/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: rm_empty_fastq
description: |
Remove empty FastQ files based on a user-defined taxid.
In other words, this taxid was not detected by the classifier (Kraken2/Centrifuge/DIAMOND).
keywards:
- empty
- fastq
input:
- folder:
type: path
description: Path stores the extracted reads from Kraken2/Centrifuge/DIAMOND
output:
- folder:
type: path
description: Path stores the non-empty extracted reads from Kraken2/Centrifuge/DIAMOND
authors:
- "@LilyAnderssonLee"
34 changes: 34 additions & 0 deletions modules/local/subset_bam/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process SUBSET_BAM {

tag "$meta.id"
label 'process_low'

conda "bioconda::samtools:1.21"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0':
'biocontainers/samtools:1.21--h50ea8bc_0' }"

input:
tuple val(meta), path(bam), path(bai)
val taxid_accession

output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}"
def accessions = taxid_accession.join(" ")

"""
samtools view $bam $accessions -o ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/local/subset_bam/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: subset_bam
description: |
Subset bam file for each taxid
keywards:
- samtools
- view
- subset
- bam
tools:
- samtools:
description: |
SAMtools is a set of utilities for interacting with and post-processing
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
These files are generated as output by short read aligners like BWA.
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
identifier: biotools:samtools
input:
- - meta:
type: map
description: |
Groovy map containing sample information
e.g. [ id:''test, single_end:false]
- - bam:
type: file
description: BAM file of reads aligned to the pathogen genomes
pattern: "*.{bam}"
- - bai:
type: file
description: BAI file (BAM index) of BAM reads aligned to the pathogen genome
pattern: "*.{bai}"
- taxid_accession:
type: list
description: A list of acessions of a taxid
output:
- - meta:
type: map
description: |
Groovy map containing sample information
e.g. [ id:''test, single_end:false]
- - bam:
type: file
description: A subset bam file
pattern: "*.{bam}"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@LilyAnderssonLee"
Loading

0 comments on commit d37ee4b

Please sign in to comment.