Skip to content

Commit

Permalink
Update nextclade to 2.11 (#74)
Browse files Browse the repository at this point in the history
  • Loading branch information
sofstam authored Sep 22, 2023
1 parent 94d179e commit 0c599b9
Show file tree
Hide file tree
Showing 24 changed files with 85 additions and 107 deletions.
8 changes: 4 additions & 4 deletions .github/scripts/install_singularity.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ echo $(which go)
go version

# install Singularity
export VERSION=3.7.3
export VERSION=3.11.4
echo Install Singularity version $VERSION .. >> artifacts/test_artifact.log
wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz
tar -xzf singularity-${VERSION}.tar.gz
cd singularity
wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-ce-${VERSION}.tar.gz
tar -xzf singularity-ce-${VERSION}.tar.gz
cd singularity-ce-${VERSION}
./mconfig
make -C builddir
sudo make -C builddir install
Expand Down
8 changes: 4 additions & 4 deletions .github/scripts/test_nanopore_pipelines.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export PATH=/opt/conda/bin:$PATH
singularity --version
# write test log as github Action artifact
echo "Nextflow run current PR in --nanopolish mode (no barcodes).." >> artifacts/test_artifact.log
NXF_VER=21.04.0 nextflow run main.nf \
NXF_VER=23.04.3 nextflow run main.nf \
-profile singularity \
--nanopolish --prefix "test_nanopore" \
--basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
Expand All @@ -17,7 +17,7 @@ cp .nextflow.log artifacts/nanopolish.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --nanopolish mode (barcodes).." >> artifacts/test_artifact.log
NXF_VER=21.04.0 nextflow run main.nf \
NXF_VER=23.04.3 nextflow run main.nf \
-profile singularity \
--nanopolish --prefix "20200311_1427_X1_FAK72834_a3787181" \
--basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
Expand All @@ -27,7 +27,7 @@ cp .nextflow.log artifacts/nanopolish_barcodes.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --medaka mode (no barcodes).." >> artifacts/test_artifact.log
NXF_VER=21.04.0 nextflow run main.nf \
NXF_VER=23.04.3 nextflow run main.nf \
-profile singularity \
--medaka \
--basecalled_fastq .github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/fastq_pass/ \
Expand All @@ -36,7 +36,7 @@ cp .nextflow.log artifacts/medaka.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --medaka mode (barcodes).." >> artifacts/test_artifact.log
NXF_VER=21.04.0 nextflow run main.nf \
NXF_VER=23.04.3 nextflow run main.nf \
-profile singularity \
--medaka \
--basecalled_fastq .github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
run: |
export PATH=/opt/conda/bin:$PATH
conda install -c bioconda nextflow
NXF_VER=21.04.0 nextflow -version
NXF_VER=23.04.3 nextflow -version
- name: test nanopore pipelines
run: bash .github/scripts/test_nanopore_pipelines.sh
- name: test typing functionality
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[![Update docker for artic-ncov2019-illumina, artic-ncov2019-nanopore](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile.yml)
[![Update docker for gms-artic-illumina, gms-artic-nanopore](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_master.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_master.yml) [![Update docker for gms-artic-pangolin
](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_pangolin_master.yml/badge.svg)](https://github.com/genomic-medicine-sweden/gms-artic/actions/workflows/build_dockerfile_pangolin_master.yml)

![logo](workflow-image/logo.png)

Expand Down
2 changes: 1 addition & 1 deletion bin/get_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@
contents = f.read()
match = re.search(regex, contents)
if match:
# Add version number to output
# Add version number to output file
out.write("{},{}\n".format(tool, match.group(1)))
5 changes: 1 addition & 4 deletions bin/process_gvcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
from collections import defaultdict


# from https://www.geeksforgeeks.org/python-make-a-list-of-intervals-with-sequential-numbers/
# via artic-mask
def intervals_extract(iterable):
Expand Down Expand Up @@ -119,7 +120,6 @@ def handle_sub(vcf_header, record):

# construct output records
for i in range(0, sub_length):

# choose base with highest frequency, skipping the reference
max_b = base_max(base_frequency[i], record.ref[i])
if max_b is None:
Expand All @@ -137,7 +137,6 @@ def handle_sub(vcf_header, record):


def main():

description = "Process a .gvcf file to create a file of consensus variants, low-frequency variants and a coverage mask"
parser = argparse.ArgumentParser(description=description)

Expand Down Expand Up @@ -221,7 +220,6 @@ def main():
)

for record in vcf:

is_gvcf_ref = record.alts[0] == "<*>"

# set depth for this part of the genome
Expand Down Expand Up @@ -261,7 +259,6 @@ def main():
# classify variants using VAF cutoffs for IUPAC ambiguity codes, etc
accept_variant = False
for out_r in out_records:

# at this point we should have resolved multi-allelic variants
assert len(out_r.alts) == 1

Expand Down
4 changes: 0 additions & 4 deletions bin/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def get_N_positions(fasta):


def get_pct_N_bases(fasta):

count_N = len(get_N_positions(fasta))

pct_N_bases = count_N / len(fasta.seq) * 100
Expand All @@ -104,7 +103,6 @@ def get_ref_length(ref):


def sliding_window_N_density(sequence, window=10):

sliding_window_n_density = []
for i in range(0, len(sequence.seq), 1):
window_mid = i + (window / 2)
Expand All @@ -118,7 +116,6 @@ def sliding_window_N_density(sequence, window=10):


def get_num_reads(bamfile):

st_filter = "0x900"
command = "samtools view -c -F{} {}".format(st_filter, bamfile)
what = shlex.split(command)
Expand Down Expand Up @@ -151,7 +148,6 @@ def go(args):
qc_pass = "FALSE"

if len(fasta.seq) != 0:

pct_N_bases = get_pct_N_bases(fasta)
largest_N_gap = get_largest_N_gap(fasta)

Expand Down
16 changes: 0 additions & 16 deletions bin/type_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ def csq_annotate_vcf_string(vcfString, RefIn, GffIn):


def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):

v = io.StringIO(csqVcf)

vcf_reader = vcf.Reader(v)
Expand All @@ -235,7 +234,6 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
vcf_type = None

for record in vcf_reader:

if vcf_type == "nanopolish":
if record.FILTER:
continue
Expand All @@ -260,7 +258,6 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):
continue

if vcf_type == "medaka":

if record.FILTER:
continue

Expand Down Expand Up @@ -289,11 +286,9 @@ def extract_csq_info_from_vcf_string(csqVcf, minAF, minDP):


def get_variant_summary(info):

sample_vars = []

for variant in info:

aa_r = re.compile(
"(?P<refpos>[0-9]+)(?P<refaa>[A-Z\*]+)*>*(?P<varpos>[0-9]+)*(?P<varaa>[A-Z\*]+)"
)
Expand All @@ -308,9 +303,7 @@ def get_variant_summary(info):
"synonymous" not in variant["consequence"]
and "stop_retained" not in variant["consequence"]
):

if "frameshift" in variant["consequence"]:

complete_aa_variant_string = (
"Frameshift." + aa_var["refpos"] + aa_var["varaa"]
)
Expand Down Expand Up @@ -345,7 +338,6 @@ def read_types_yaml(inFile):


def type_vars_in_sample(types, sample_vars):

types_assigned = []

for typename, data in types.items():
Expand All @@ -357,14 +349,10 @@ def type_vars_in_sample(types, sample_vars):

# {gene, [var, var]}
for gene in data["variants"]:

# { gene: gene, aa_var: var, dna_var: var }
for sample_variant in sample_vars:

if gene in sample_variant["gene"]:

if sample_variant["aa_var"] in data["variants"][gene]:

data["variants"][gene].remove(sample_variant["aa_var"])
additional_vars_from_type.remove(sample_variant)

Expand Down Expand Up @@ -405,13 +393,11 @@ def type_vars_in_sample(types, sample_vars):


def write_types_to_csv(types_assigned, sampleID, csvFileOut):

fieldnames = list(types_assigned[0].keys())

fieldnames.insert(0, "sampleID")

with open(csvFileOut, "w", newline="") as csvfile:

writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

writer.writeheader()
Expand All @@ -421,7 +407,6 @@ def write_types_to_csv(types_assigned, sampleID, csvFileOut):


def read_vcf_to_vcf_string(FileIn):

if FileIn.endswith(".gz"):
with gzip.open(FileIn, "rt") as f:
vcfString = f.read()
Expand All @@ -444,7 +429,6 @@ def write_sample_vars_to_csv(summaryCsvOut, sampleID, sampleVars):
fieldnames.insert(0, "sampleID")

with open(summaryCsvOut, "w", newline="") as csvfile:

writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

writer.writeheader()
Expand Down
3 changes: 3 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ params{
// Scheme version
schemeVersion = 'V3'

// Nextclade dataset name
nextcladeData = 'sars-cov-2-21L'

// Run experimental medaka pipeline? Specify in the command using "--medaka"
medaka = false

Expand Down
8 changes: 3 additions & 5 deletions environments/illumina/Singularity
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Bootstrap: docker
From: continuumio/miniconda3:latest
From: condaforge/mambaforge:latest
Stage: condabuild

%files
Expand All @@ -11,8 +11,7 @@ authors="Matt Bull"
description="Docker image containing all requirements for the ARTIC project's ncov2019 pipeline"

%post
/opt/conda/bin/conda install mamba -c conda-forge && \
/opt/conda/bin/mamba env create -f /environment.yml #&& \
/opt/conda/bin/mamba env create -f /environment.yml && \
/opt/conda/bin/mamba env update -f /extras.yml -n artic-ncov2019-illumina


Expand All @@ -25,9 +24,8 @@ export PATH=/opt/conda/envs/artic-ncov2019-illumina/bin:$PATH
export LC_ALL=C.UTF-8
export LANG=C.UTF-8


%files from condabuild
/opt/conda/envs/artic-ncov2019-illumina /opt/conda/envs/artic-ncov2019-illumina
/opt/conda/envs/artic-ncov2019-illumina /opt/conda/envs/artic-ncov2019-illumina

%post
apt-get update
Expand Down
2 changes: 1 addition & 1 deletion environments/illumina/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies:
- fastqc=0.11.9
- rich=12.6.0
- multiqc=1.11
- nextclade=1.10.2
- nextclade=2.11
- sambamba=0.8.0
- ensembl-vep>=102.0
- conda-forge::r-base
Expand Down
1 change: 0 additions & 1 deletion environments/nanopore/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ COPY --from=condabuild /opt/conda/envs/artic /opt/conda/envs/artic
ENV PATH=/opt/conda/envs/artic/bin:$PATH
ENV LC_ALL C.UTF-8
ENV LANG C.UTF-8
ENTRYPOINT ["/opt/conda/envs/artic/bin/artic"]
6 changes: 3 additions & 3 deletions environments/nanopore/Singularity
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Bootstrap: docker
From: continuumio/miniconda3:latest
From: condaforge/mambaforge:latest
Stage: condabuild

%files
environments/nanopore/environment.yml /environment.yml
environments/extras.yml /extras.yml
Expand All @@ -10,7 +11,6 @@ authors="Matt Bull"
description="Docker image containing all requirements for the ARTIC project's ncov2019 pipeline"

%post
/opt/conda/bin/conda install mamba -c conda-forge && \
/opt/conda/bin/mamba env create -f /environment.yml #&& \
/opt/conda/bin/mamba env update -f /extras.yml -n artic

Expand All @@ -24,7 +24,7 @@ apt-get install -y git procps && \
apt-get clean -y

%files from condabuild
/opt/conda/envs/artic /opt/conda/envs/artic
/opt/conda/envs/artic /opt/conda/envs/artic

%environment
export PATH=/opt/conda/envs/artic/bin:$PATH
Expand Down
2 changes: 1 addition & 1 deletion environments/nanopore/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- usher=0.2.0
- snakemake-minimal=5.13
- minimap2=2.17
- nextclade=1.10.2
- nextclade=2.11
- fastqc=0.11.9
- rich=12.6.0
- multiqc=1.11
Expand Down
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env nextflow

// enable dsl2
nextflow.preview.dsl = 2
nextflow.enable.dsl=2

// include modules
include {containerupdate} from './modules/containerupdate.nf'
Expand Down
4 changes: 2 additions & 2 deletions modules/analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ process makeReport {
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", mode: 'copy', pattern: "${sampleName}_report.tsv"

input:
tuple(sampleName, path('pangolinTyping.csv'), path('nextclade_tree.json'), path('nextclade.tsv'),
path('nextclade.json'))
tuple val(sampleName), path('pangolinTyping.csv'), path('nextclade_tree.json'), path('nextclade.tsv'),
path('nextclade.json')

output:
path "${sampleName}_report.tsv", emit: tsv
Expand Down
20 changes: 10 additions & 10 deletions modules/artic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ process articMinIONMedaka {
output:
file("${sampleName}*")

tuple sampleName, file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
tuple sampleName, file("${sampleName}.sorted.bam"), emit: mapped
tuple sampleName, file("${sampleName}.consensus.fasta"), emit: consensus_fasta
tuple sampleName, file("${sampleName}.pass.vcf.gz"), emit: vcf
tuple val(sampleName), file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
tuple val(sampleName), file("${sampleName}.sorted.bam"), emit: mapped
tuple val(sampleName), file("${sampleName}.consensus.fasta"), emit: consensus_fasta
tuple val(sampleName), file("${sampleName}.pass.vcf.gz"), emit: vcf

script:
// Make an identifier from the fastq filename
Expand Down Expand Up @@ -102,10 +102,10 @@ process articMinIONNanopolish {
output:
file("${sampleName}*")

tuple sampleName, file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
tuple sampleName, file("${sampleName}.sorted.bam"), emit: mapped
tuple sampleName, file("${sampleName}.consensus.fasta"), emit: consensus_fasta
tuple sampleName, file("${sampleName}.pass.vcf.gz"), emit: vcf
tuple val(sampleName), file("${sampleName}.primertrimmed.rg.sorted.bam"), emit: ptrim
tuple val(sampleName), file("${sampleName}.sorted.bam"), emit: mapped
tuple val(sampleName), file("${sampleName}.consensus.fasta"), emit: consensus_fasta
tuple val(sampleName), file("${sampleName}.pass.vcf.gz"), emit: vcf

script:
// Make an identifier from the fastq filename
Expand Down Expand Up @@ -144,10 +144,10 @@ process articRemoveUnmappedReads {
cpus 1

input:
tuple(sampleName, path(bamfile))
tuple val(sampleName), path(bamfile)

output:
tuple( sampleName, file("${sampleName}.mapped.sorted.bam"))
tuple val(sampleName), file("${sampleName}.mapped.sorted.bam")

script:
"""
Expand Down
Loading

0 comments on commit 0c599b9

Please sign in to comment.