Update nextclade to 2.11 (#74)

genomic-medicine-sweden · Sep 22, 2023 · 0c599b9 · 0c599b9
1 parent 94d179e
commit 0c599b9
Show file tree

Hide file tree

Showing 24 changed files with 85 additions and 107 deletions.
diff --git a/.github/scripts/install_singularity.sh b/.github/scripts/install_singularity.sh
@@ -20,11 +20,11 @@ echo $(which go)
 go version
 
 # install Singularity
-export VERSION=3.7.3
+export VERSION=3.11.4
 echo Install Singularity version $VERSION .. >> artifacts/test_artifact.log
-wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz
-tar -xzf singularity-${VERSION}.tar.gz
-cd singularity
+wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-ce-${VERSION}.tar.gz
+tar -xzf singularity-ce-${VERSION}.tar.gz
+cd singularity-ce-${VERSION}
 ./mconfig
 make -C builddir
 sudo make -C builddir install

diff --git a/.github/scripts/test_nanopore_pipelines.sh b/.github/scripts/test_nanopore_pipelines.sh
@@ -7,7 +7,7 @@ export PATH=/opt/conda/bin:$PATH
 singularity --version
 # write test log as github Action artifact
 echo "Nextflow run current PR in --nanopolish mode (no barcodes).." >> artifacts/test_artifact.log
-NXF_VER=21.04.0 nextflow run main.nf \
+NXF_VER=23.04.3 nextflow run main.nf \
        -profile singularity \
        --nanopolish --prefix "test_nanopore" \
        --basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
@@ -17,7 +17,7 @@ cp .nextflow.log artifacts/nanopolish.nextflow.log
 rm -rf results && rm -rf work && rm -rf .nextflow*
 
 echo "Nextflow run current PR in --nanopolish mode (barcodes).." >> artifacts/test_artifact.log
-NXF_VER=21.04.0 nextflow run main.nf \
+NXF_VER=23.04.3 nextflow run main.nf \
        -profile singularity \
        --nanopolish --prefix "20200311_1427_X1_FAK72834_a3787181" \
        --basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
@@ -27,7 +27,7 @@ cp .nextflow.log artifacts/nanopolish_barcodes.nextflow.log
 rm -rf results && rm -rf work && rm -rf .nextflow*
 
 echo "Nextflow run current PR in --medaka mode (no barcodes).." >> artifacts/test_artifact.log
-NXF_VER=21.04.0 nextflow run main.nf \
+NXF_VER=23.04.3 nextflow run main.nf \
        -profile singularity \
        --medaka \
        --basecalled_fastq .github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/fastq_pass/ \
@@ -36,7 +36,7 @@ cp .nextflow.log artifacts/medaka.nextflow.log
 rm -rf results && rm -rf work && rm -rf .nextflow*
 
 echo "Nextflow run current PR in --medaka mode (barcodes).." >> artifacts/test_artifact.log
-NXF_VER=21.04.0 nextflow run main.nf \
+NXF_VER=23.04.3 nextflow run main.nf \
        -profile singularity \
        --medaka  \
        --basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -29,7 +29,7 @@ jobs:
       run: |
        export PATH=/opt/conda/bin:$PATH
        conda install -c bioconda nextflow
-       NXF_VER=21.04.0 nextflow -version
+       NXF_VER=23.04.3 nextflow -version
     - name: test nanopore pipelines
       run: bash .github/scripts/test_nanopore_pipelines.sh
     - name: test typing functionality

diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
-[![Update docker for artic-ncov2019-illumina, artic-ncov2019-nanopore](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile.yml)
+[![Update docker for gms-artic-illumina, gms-artic-nanopore](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_master.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_master.yml) [![Update docker for gms-artic-pangolin
+](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_pangolin_master.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_pangolin_master.yml)
 
 ![logo](workflow-image/logo.png)
 

diff --git a/bin/get_versions.py b/bin/get_versions.py
@@ -41,5 +41,5 @@
                 contents = f.read()
                 match = re.search(regex, contents)
                 if match:
-                    # Add version number to output
+                    # Add version number to output file
                     out.write("{},{}\n".format(tool, match.group(1)))
diff --git a/bin/process_gvcf.py b/bin/process_gvcf.py
@@ -7,6 +7,7 @@
 import os
 from collections import defaultdict
 
+
 # from https://www.geeksforgeeks.org/python-make-a-list-of-intervals-with-sequential-numbers/
 # via artic-mask
 def intervals_extract(iterable):
@@ -119,7 +120,6 @@ def handle_sub(vcf_header, record):
 
     # construct output records
     for i in range(0, sub_length):
-
         # choose base with highest frequency, skipping the reference
         max_b = base_max(base_frequency[i], record.ref[i])
         if max_b is None:
@@ -137,7 +137,6 @@ def handle_sub(vcf_header, record):
 
 
 def main():
-
     description = "Process a .gvcf file to create a file of consensus variants, low-frequency variants and a coverage mask"
     parser = argparse.ArgumentParser(description=description)
 
@@ -221,7 +220,6 @@ def main():
     )
 
     for record in vcf:
-
         is_gvcf_ref = record.alts[0] == "<*>"
 
         # set depth for this part of the genome
@@ -261,7 +259,6 @@ def main():
         # classify variants using VAF cutoffs for IUPAC ambiguity codes, etc
         accept_variant = False
         for out_r in out_records:
-
             # at this point we should have resolved multi-allelic variants
             assert len(out_r.alts) == 1
 

diff --git a/bin/qc.py b/bin/qc.py
@@ -80,7 +80,6 @@ def get_N_positions(fasta):
 
 
 def get_pct_N_bases(fasta):
-
     count_N = len(get_N_positions(fasta))
 
     pct_N_bases = count_N / len(fasta.seq) * 100
@@ -104,7 +103,6 @@ def get_ref_length(ref):
 
 
 def sliding_window_N_density(sequence, window=10):
-
     sliding_window_n_density = []
     for i in range(0, len(sequence.seq), 1):
         window_mid = i + (window / 2)
@@ -118,7 +116,6 @@ def sliding_window_N_density(sequence, window=10):
 
 
 def get_num_reads(bamfile):
-
     st_filter = "0x900"
     command = "samtools view -c -F{} {}".format(st_filter, bamfile)
     what = shlex.split(command)
@@ -151,7 +148,6 @@ def go(args):
     qc_pass = "FALSE"
 
     if len(fasta.seq) != 0:
-
         pct_N_bases = get_pct_N_bases(fasta)
         largest_N_gap = get_largest_N_gap(fasta)
 

diff --git a/bin/type_vcf.py b/bin/type_vcf.py
@@ -210,7 +210,6 @@ def csq_annotate_vcf_string(vcfString, RefIn, GffIn):
 
 
 def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
-
     v = io.StringIO(csqVcf)
 
     vcf_reader = vcf.Reader(v)
@@ -235,7 +234,6 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
         vcf_type = None
 
     for record in vcf_reader:
-
         if vcf_type == "nanopolish":
             if record.FILTER:
                 continue
@@ -260,7 +258,6 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
                 continue
 
         if vcf_type == "medaka":
-
             if record.FILTER:
                 continue
 
@@ -289,11 +286,9 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
 
 
 def get_variant_summary(info):
-
     sample_vars = []
 
     for variant in info:
-
         aa_r = re.compile(
             "(?P<refpos>[0-9]+)(?P<refaa>[A-Z\*]+)*>*(?P<varpos>[0-9]+)*(?P<varaa>[A-Z\*]+)"
         )
@@ -308,9 +303,7 @@ def get_variant_summary(info):
             "synonymous" not in variant["consequence"]
             and "stop_retained" not in variant["consequence"]
         ):
-
             if "frameshift" in variant["consequence"]:
-
                 complete_aa_variant_string = (
                     "Frameshift." + aa_var["refpos"] + aa_var["varaa"]
                 )
@@ -345,7 +338,6 @@ def read_types_yaml(inFile):
 
 
 def type_vars_in_sample(types, sample_vars):
-
     types_assigned = []
 
     for typename, data in types.items():
@@ -357,14 +349,10 @@ def type_vars_in_sample(types, sample_vars):
 
         # {gene, [var, var]}
         for gene in data["variants"]:
-
             # { gene: gene, aa_var: var, dna_var: var }
             for sample_variant in sample_vars:
-
                 if gene in sample_variant["gene"]:
-
                     if sample_variant["aa_var"] in data["variants"][gene]:
-
                         data["variants"][gene].remove(sample_variant["aa_var"])
                         additional_vars_from_type.remove(sample_variant)
 
@@ -405,13 +393,11 @@ def type_vars_in_sample(types, sample_vars):
 
 
 def write_types_to_csv(types_assigned, sampleID, csvFileOut):
-
     fieldnames = list(types_assigned[0].keys())
 
     fieldnames.insert(0, "sampleID")
 
     with open(csvFileOut, "w", newline="") as csvfile:
-
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
         writer.writeheader()
@@ -421,7 +407,6 @@ def write_types_to_csv(types_assigned, sampleID, csvFileOut):
 
 
 def read_vcf_to_vcf_string(FileIn):
-
     if FileIn.endswith(".gz"):
         with gzip.open(FileIn, "rt") as f:
             vcfString = f.read()
@@ -444,7 +429,6 @@ def write_sample_vars_to_csv(summaryCsvOut, sampleID, sampleVars):
     fieldnames.insert(0, "sampleID")
 
     with open(summaryCsvOut, "w", newline="") as csvfile:
-
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
         writer.writeheader()

diff --git a/conf/base.config b/conf/base.config
@@ -29,6 +29,9 @@ params{
     // Scheme version
     schemeVersion = 'V3'
 
+    // Nextclade dataset name
+    nextcladeData = 'sars-cov-2-21L'
+
     // Run experimental medaka pipeline? Specify in the command using "--medaka"
     medaka = false
 

diff --git a/environments/illumina/Singularity b/environments/illumina/Singularity
@@ -1,5 +1,5 @@
 Bootstrap: docker
-From: continuumio/miniconda3:latest
+From: condaforge/mambaforge:latest
 Stage: condabuild
 
 %files
@@ -11,8 +11,7 @@ authors="Matt Bull"
 description="Docker image containing all requirements for the ARTIC project's ncov2019 pipeline"
 
 %post
-/opt/conda/bin/conda install mamba -c conda-forge && \
-/opt/conda/bin/mamba env create -f /environment.yml #&& \
+/opt/conda/bin/mamba env create -f /environment.yml && \
 /opt/conda/bin/mamba env update -f /extras.yml -n artic-ncov2019-illumina
 
 
@@ -25,9 +24,8 @@ export PATH=/opt/conda/envs/artic-ncov2019-illumina/bin:$PATH
 export LC_ALL=C.UTF-8
 export LANG=C.UTF-8
 
-
 %files from condabuild
-  /opt/conda/envs/artic-ncov2019-illumina /opt/conda/envs/artic-ncov2019-illumina
+/opt/conda/envs/artic-ncov2019-illumina /opt/conda/envs/artic-ncov2019-illumina
 
 %post
 apt-get update 

diff --git a/environments/illumina/environment.yml b/environments/illumina/environment.yml
@@ -27,7 +27,7 @@ dependencies:
   - fastqc=0.11.9
   - rich=12.6.0
   - multiqc=1.11
-  - nextclade=1.10.2
+  - nextclade=2.11
   - sambamba=0.8.0
   - ensembl-vep>=102.0
   - conda-forge::r-base

diff --git a/environments/nanopore/Dockerfile b/environments/nanopore/Dockerfile
@@ -16,4 +16,3 @@ COPY --from=condabuild /opt/conda/envs/artic /opt/conda/envs/artic
 ENV PATH=/opt/conda/envs/artic/bin:$PATH
 ENV LC_ALL C.UTF-8
 ENV LANG C.UTF-8
-ENTRYPOINT ["/opt/conda/envs/artic/bin/artic"]
diff --git a/environments/nanopore/Singularity b/environments/nanopore/Singularity
@@ -1,6 +1,7 @@
 Bootstrap: docker
-From: continuumio/miniconda3:latest
+From: condaforge/mambaforge:latest
 Stage: condabuild
+
 %files
 environments/nanopore/environment.yml /environment.yml
 environments/extras.yml /extras.yml
@@ -10,7 +11,6 @@ authors="Matt Bull"
 description="Docker image containing all requirements for the ARTIC project's ncov2019 pipeline"
 
 %post
-/opt/conda/bin/conda install mamba -c conda-forge && \
 /opt/conda/bin/mamba env create -f /environment.yml #&& \
 /opt/conda/bin/mamba env update -f /extras.yml -n artic
 
@@ -24,7 +24,7 @@ apt-get install -y git procps && \
 apt-get clean -y
 
 %files from condabuild
-  /opt/conda/envs/artic /opt/conda/envs/artic
+/opt/conda/envs/artic /opt/conda/envs/artic
 
 %environment
 export PATH=/opt/conda/envs/artic/bin:$PATH

diff --git a/environments/nanopore/environment.yml b/environments/nanopore/environment.yml
@@ -12,7 +12,7 @@ dependencies:
   - usher=0.2.0
   - snakemake-minimal=5.13
   - minimap2=2.17
-  - nextclade=1.10.2
+  - nextclade=2.11
   - fastqc=0.11.9
   - rich=12.6.0
   - multiqc=1.11

diff --git a/main.nf b/main.nf
@@ -1,7 +1,7 @@
 #!/usr/bin/env nextflow
 
 // enable dsl2
-nextflow.preview.dsl = 2
+nextflow.enable.dsl=2
 
 // include modules
 include {containerupdate} from './modules/containerupdate.nf'

diff --git a/modules/analysis.nf b/modules/analysis.nf
@@ -15,8 +15,8 @@ process makeReport {
     publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", mode: 'copy', pattern: "${sampleName}_report.tsv"
 
     input:
-    tuple(sampleName, path('pangolinTyping.csv'), path('nextclade_tree.json'), path('nextclade.tsv'),
-		path('nextclade.json'))
+    tuple val(sampleName), path('pangolinTyping.csv'), path('nextclade_tree.json'), path('nextclade.tsv'),
+		path('nextclade.json')
 
     output:
     path "${sampleName}_report.tsv", emit: tsv

diff --git a/modules/artic.nf b/modules/artic.nf
@@ -54,10 +54,10 @@ process articMinIONMedaka {
     output:
     file("${sampleName}*")
 
-    tuple sampleName, file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
-    tuple sampleName, file("${sampleName}.sorted.bam"), emit: mapped
-    tuple sampleName, file("${sampleName}.consensus.fasta"), emit: consensus_fasta
-    tuple sampleName, file("${sampleName}.pass.vcf.gz"), emit: vcf
+    tuple val(sampleName), file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
+    tuple val(sampleName), file("${sampleName}.sorted.bam"), emit: mapped
+    tuple val(sampleName), file("${sampleName}.consensus.fasta"), emit: consensus_fasta
+    tuple val(sampleName), file("${sampleName}.pass.vcf.gz"), emit: vcf
 
     script:
     // Make an identifier from the fastq filename
@@ -102,10 +102,10 @@ process articMinIONNanopolish {
     output:
     file("${sampleName}*")
 
-    tuple sampleName, file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
-    tuple sampleName, file("${sampleName}.sorted.bam"), emit: mapped
-    tuple sampleName, file("${sampleName}.consensus.fasta"), emit: consensus_fasta
-    tuple sampleName, file("${sampleName}.pass.vcf.gz"), emit: vcf
+    tuple val(sampleName), file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
+    tuple val(sampleName), file("${sampleName}.sorted.bam"), emit: mapped
+    tuple val(sampleName), file("${sampleName}.consensus.fasta"), emit: consensus_fasta
+    tuple val(sampleName), file("${sampleName}.pass.vcf.gz"), emit: vcf
 
     script:
     // Make an identifier from the fastq filename
@@ -144,10 +144,10 @@ process articRemoveUnmappedReads {
     cpus 1
 
     input:
-    tuple(sampleName, path(bamfile))
+    tuple val(sampleName), path(bamfile)
 
     output:
-    tuple( sampleName, file("${sampleName}.mapped.sorted.bam"))
+    tuple val(sampleName), file("${sampleName}.mapped.sorted.bam")
 
     script:
     """