-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d259ea4
commit d37ee4b
Showing
12 changed files
with
375 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
process EXTRACT_VIRAL_TAXID { | ||
|
||
tag "$meta.id" | ||
label 'process_low' | ||
|
||
input: | ||
val evalue // e-vaule threshold to filter the diamond report | ||
tuple val(meta), path(taxpasta_standardised_profile) | ||
tuple val(meta), path(report) // classification report | ||
|
||
output: | ||
tuple val(meta), path("*viral_taxids.tsv"), optional:true, emit: viral_taxid | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def prefix = task.ext.prefix ?: "${meta.id}_${meta.tool}" | ||
|
||
""" | ||
if grep -qi "virus" $taxpasta_standardised_profile; then | ||
grep -i "virus" $taxpasta_standardised_profile | cut -f 1 > taxpasta_viral_taxid.txt | ||
if [[ "${meta.tool}" == "kraken2" || "${meta.tool}" == "centrifuge" ]]; then | ||
awk -F'\t' '\$3 != 0 {print \$5}' ${report} > detected_taxid.txt | ||
grep -F -w -f taxpasta_viral_taxid.txt detected_taxid.txt > ${prefix}_viral_taxids.tsv | ||
elif [[ "${meta.tool}" == "diamond" ]]; then | ||
awk '\$3 < ${evalue}' ${report} | cut -f 2 | uniq > detected_taxid.txt | ||
grep -F -w -f taxpasta_viral_taxid.txt detected_taxid.txt | uniq > ${prefix}_viral_taxids.tsv | ||
fi | ||
else | ||
echo "No viral taxids found." > "no_viral_taxid.txt" | ||
fi | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: extract_viral_taxid | ||
description: Extract the taxid of viruses identified by the classifier (Kraken2/Centrifuge/DIAMOND) | ||
keywards: | ||
- taxid | ||
- virus | ||
- taxpasta | ||
- kraken2 | ||
- centrifuge | ||
- diamond | ||
input: | ||
- evalue: | ||
type: ["number", "integer"] | ||
description: A e-vaule threshold to filter the diamond classification result | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy map containing sample information | ||
e.g. [ id:''test, single_end:false] | ||
- - taxpasta_standardised_profile: | ||
type: file | ||
description: Path to the taxpasta standardised profile | ||
- - report: | ||
type: file | ||
description: | | ||
Path to the classifier report (Kraken2/Centrifuge) or TSV file (DIAMOND) | ||
containing the identified viral taxid. | ||
output: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy map containing sample information | ||
e.g. [ id:''test, single_end:false] | ||
- - viral_taxid: | ||
type: file | ||
description: Extract viral taxid | ||
pattern: "*viral_taxids.tsv" | ||
authors: | ||
- "@LilyAnderssonLee" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
process EXTRACTCENTRIFUGEREADS { | ||
|
||
tag "$meta.id" | ||
label 'process_low' | ||
|
||
conda "bioconda::seqkit=2.8.2" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/seqkit:2.8.2--h9ee0642_1': | ||
'biocontainers/seqkit:2.8.2--h9ee0642_1' }" | ||
|
||
input: | ||
val taxid | ||
tuple val (meta), path(results) | ||
tuple val (meta), path(fastq) // bowtie2/align *unmapped_{1,2}.fastq.gz | ||
|
||
output: | ||
tuple val(meta), path("*.fastq"), optional:true, emit: extracted_centrifuge_reads | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
awk -v taxID=$taxid '\$3 == taxID && \$8 == 1 {print \$1}' $results > readID.txt | ||
if [ "${meta.single_end}" == 'true' ]; then | ||
seqkit grep -f readID.txt $fastq > ${prefix}_${taxid}.extracted_centrifuge_read.fastq | ||
elif [ "${meta.single_end}" == 'false' ]; then | ||
seqkit grep -f readID.txt ${fastq[0]} > ${prefix}_${taxid}.extracted_centrifuge_read1.fastq | ||
seqkit grep -f readID.txt ${fastq[1]} > ${prefix}_${taxid}.extracted_centrifuge_read2.fastq | ||
fi | ||
rm readID.txt | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqkit: \$( seqkit version | sed 's/seqkit v//' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
name: extractcentrifuge | ||
description: Use a custom python script to extract reads with specified taxonomic ID from the Centrifuge classification output | ||
keywards: | ||
- taxid | ||
- centrifuge | ||
- results | ||
- fastq | ||
- extract_reads | ||
input: | ||
- taxid: | ||
type: integer | ||
description: A taxonomic ID to extract the reads | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- - results: | ||
type: file | ||
description: File containing classification results | ||
pattern: "*.{results.txt}" | ||
- - fastq: | ||
type: file | ||
description: FastQ files | ||
pattern: "*.fastq.gz" | ||
output: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- - extracted_centrifuge_reads: | ||
type: file | ||
description: FastQ files contain the extracted reads of a specified taxid | ||
pattern: "*fastq" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@LilyAnderssonLee" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
process EXTRACTCDIAMONDREADS { | ||
|
||
tag "$meta.id" | ||
label 'process_high' | ||
|
||
conda "bioconda::seqkit=2.8.2" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/seqkit:2.8.2--h9ee0642_1': | ||
'biocontainers/seqkit:2.8.2--h9ee0642_1' }" | ||
|
||
input: | ||
val taxid | ||
tuple val (meta), path(tsv) | ||
tuple val (meta), path(fastq) // bowtie2/align *unmapped_{1,2}.fastq.gz | ||
|
||
output: | ||
tuple val(meta), path("*.fastq"), optional:true, emit: extracted_diamond_reads | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
||
""" | ||
awk -v taxID=$taxid '\$2 == taxID {print \$1}' $tsv > readID.txt | ||
if [ ${meta.single_end} == 'true' ]; then | ||
seqkit grep -f readID.txt $fastq > ${prefix}_${taxid}.extracted_diamond_read.fastq | ||
elif [ "${meta.single_end}" == 'false' ]; then | ||
seqkit grep -f readID.txt ${fastq[0]} > ${prefix}_${taxid}.extracted_diamond_read1.fastq | ||
seqkit grep -f readID.txt ${fastq[1]} > ${prefix}_${taxid}.extracted_diamond_read2.fastq | ||
fi | ||
rm readID.txt | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqkit: \$( seqkit version | sed 's/seqkit v//' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
name: extractdiamondreads | ||
description: Use a custom python script to extract reads with specified taxonomic ID from the DIAMOND classification output | ||
keywards: | ||
- taxid | ||
- DIAMOND/blastx | ||
- tsv | ||
- fastq | ||
- extract_reads | ||
input: | ||
- taxid: | ||
type: integer | ||
description: A taxonomic ID to extract the reads | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- tsv: | ||
type: file | ||
description: Tab separated file containing DIAMOND taxonomic classification of hits | ||
pattern: "*.tsv" | ||
- fastq: | ||
type: file | ||
description: FastQ files | ||
pattern: "*.fastq.gz" | ||
output: | ||
- extracted_diamond_reads: | ||
type: file | ||
description: FastQ files contain the extracted reads of a specified taxid | ||
pattern: "*fastq" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@LilyAnderssonLee" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
process RM_EMPTY_FASTQ { | ||
|
||
label 'process_low' | ||
|
||
input: | ||
path folder | ||
|
||
output: | ||
path folder, optional: true | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
""" | ||
if [ -d ${folder} ]; then | ||
for f in ${folder}/*.fastq; do | ||
if [ ! -s \$f ]; then | ||
rm \$f | ||
fi | ||
done | ||
else | ||
echo "Folder ${folder} doesn't exist." | ||
fi | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
name: rm_empty_fastq | ||
description: | | ||
Remove empty FastQ files based on a user-defined taxid. | ||
In other words, this taxid was not detected by the classifier (Kraken2/Centrifuge/DIAMOND). | ||
keywards: | ||
- empty | ||
- fastq | ||
input: | ||
- folder: | ||
type: path | ||
description: Path stores the extracted reads from Kraken2/Centrifuge/DIAMOND | ||
output: | ||
- folder: | ||
type: path | ||
description: Path stores the non-empty extracted reads from Kraken2/Centrifuge/DIAMOND | ||
authors: | ||
- "@LilyAnderssonLee" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
process SUBSET_BAM { | ||
|
||
tag "$meta.id" | ||
label 'process_low' | ||
|
||
conda "bioconda::samtools:1.21" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0': | ||
'biocontainers/samtools:1.21--h50ea8bc_0' }" | ||
|
||
input: | ||
tuple val(meta), path(bam), path(bai) | ||
val taxid_accession | ||
|
||
output: | ||
tuple val(meta), path("*.bam"), emit: bam | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def accessions = taxid_accession.join(" ") | ||
|
||
""" | ||
samtools view $bam $accessions -o ${prefix}.bam | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
samtools: \$(samtools --version |& sed '1!d ; s/samtools //') | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
name: subset_bam | ||
description: | | ||
Subset bam file for each taxid | ||
keywards: | ||
- samtools | ||
- view | ||
- subset | ||
- bam | ||
tools: | ||
- samtools: | ||
description: | | ||
SAMtools is a set of utilities for interacting with and post-processing | ||
short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. | ||
These files are generated as output by short read aligners like BWA. | ||
homepage: http://www.htslib.org/ | ||
documentation: http://www.htslib.org/doc/samtools.html | ||
doi: 10.1093/bioinformatics/btp352 | ||
licence: ["MIT"] | ||
identifier: biotools:samtools | ||
input: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy map containing sample information | ||
e.g. [ id:''test, single_end:false] | ||
- - bam: | ||
type: file | ||
description: BAM file of reads aligned to the pathogen genomes | ||
pattern: "*.{bam}" | ||
- - bai: | ||
type: file | ||
description: BAI file (BAM index) of BAM reads aligned to the pathogen genome | ||
pattern: "*.{bai}" | ||
- taxid_accession: | ||
type: list | ||
description: A list of acessions of a taxid | ||
output: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy map containing sample information | ||
e.g. [ id:''test, single_end:false] | ||
- - bam: | ||
type: file | ||
description: A subset bam file | ||
pattern: "*.{bam}" | ||
- versions: | ||
- versions.yml: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@LilyAnderssonLee" |
Oops, something went wrong.