-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
111 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
// Using DSL-2 | ||
nextflow.enable.dsl=2 | ||
|
||
// Import helpers | ||
GroovyShell shell = new GroovyShell() | ||
def helpers = shell.parse(new File("${workflow.projectDir}/helpers.gvy")) | ||
|
||
// Import sub-workflows | ||
include { bin_metagenomes } from './modules/processes/bin_metagenomes' | ||
include { align_reads } from './modules/align_reads' | ||
include { find_reads } from './modules/find_reads' | ||
|
||
// Standalone entrypoint | ||
workflow { | ||
|
||
// Show help message if the user specifies the --help flag at runtime | ||
helpers.help_message( | ||
""" | ||
Align a set of metagenomic sequencing data to a binned pangenome in | ||
which the genes have been grouped by similar occurrence across genomes | ||
(using the build_pangenome.nf workflow). | ||
Those binned gene groups will be used to summarize a batch of metagenomic | ||
data, and the similarity of gene abundance across metagenomes will be | ||
tested for concordence with the similarity across genomes. | ||
By applying the results of gene binning to the metagenome alignment | ||
results using the same gene catalog, the diversity of a collection of | ||
organisms can be collapsed into units of those co-occurrent genetic | ||
elements. | ||
Parameters: | ||
--genes Path of single deduplicated amino acid FASTA file to be used for alignment | ||
--reads Folder containing the set of FASTQ files to align against those genes | ||
--reads_suffix File ending for all reads (default: ${params.reads_suffix}) | ||
--paired Set to true if input folder contains paired-end FASTQ files (default: false) | ||
--samplesheet Optional file listing the FASTQ inputs to process (headers: sample,R1,R2) | ||
--min_score_reads Minimum alignment score (default: ${params.min_score_reads}) | ||
--min_identity Minimum percent identity of the amino acid alignment required to retain the alignment | ||
(default: ${params.min_identity}, ranges 0-100) | ||
--max_evalue Maximum E-value threshold used to filter all alignments | ||
(default: ${params.max_evalue}) | ||
--aligner Algorithm used for alignment (default: ${params.aligner}, options: diamond, blast) | ||
--query_gencode Genetic code used for conceptual translation of genome sequences | ||
(default: ${params.query_gencode}) | ||
--max_overlap Any alignment which overlaps a higher-scoring alignment by more than this | ||
amount will be filtered out (default: ${params.max_overlap}, range: 0-100) | ||
--aln_fmt Column headings used for alignment outputs (see DIAMOND documentation for details) | ||
(default: ${params.aln_fmt}) | ||
--gene_bins Grouping of genes into bins (e.g. gene_bins.csv) | ||
--group_profile Gene content of genome groups (e.g. group_profile.csv) | ||
--output Folder where output files will be written | ||
--metadata Optional: Metadata table used to compare samples (CSV) | ||
--category Optional: Column from metadata table used for comparison | ||
--min_n_reads Exclude any samples with fewer than this number of reads aligned (default: 0) | ||
--min_n_genes Exclude any samples with fewer than this number of genes detected (default: 0) | ||
""", | ||
params.help | ||
) | ||
|
||
// Make sure that the required parameters were provided | ||
helpers.require_param(params.genes, "genes") | ||
helpers.require_param(params.gene_bins, "gene_bins") | ||
helpers.require_param(params.group_profile, "group_profile") | ||
helpers.require_param(params.output, "output") | ||
|
||
// If there is no samplesheet | ||
if ( "${params.samplesheet}" != "false" ){ | ||
// Requre the reads parameter | ||
helpers.require_param(params.reads, "reads") | ||
} | ||
|
||
// Get the reads either from samplesheet or the input folder | ||
find_reads() | ||
|
||
// Align those reads against the centroids | ||
align_reads( | ||
file("${params.genes}"), | ||
find_reads.out | ||
) | ||
|
||
// Get the gene bins | ||
gene_bins = file(params.gene_bins, checkIfExists: true) | ||
|
||
// Get the genome groups | ||
genome_groups = file(params.genome_groups, checkIfExists: true) | ||
|
||
// Get the genome groups | ||
group_profile = file(params.group_profile, checkIfExists: true) | ||
|
||
// Get the metadata table | ||
metadata = file(params.metadata, checkIfExists: true) | ||
|
||
// Bin the metagenomes | ||
bin_metagenomes( | ||
align_reads.out.csv, | ||
gene_bins, | ||
genome_groups, | ||
group_profile, | ||
metadata | ||
) | ||
|
||
} |