Skip to content

Commit

Permalink
Merge pull request #405 from jodyphelan/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jodyphelan authored Nov 18, 2024
2 parents 070877f + bdbaed0 commit 31440f7
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 73 deletions.
25 changes: 22 additions & 3 deletions tb-profiler
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,20 @@ def create_output_directories(args,directories=["bam","vcf","results"]):
if pp.nofolder(args.dir+"/"+d):
os.mkdir(args.dir+"/"+d)

def plugin_decorator(func):
def wrapper(args):
for plugin in tbp.ProfilePlugin.__subclasses__():
logging.debug(f"Running pre process for {plugin}")
plugin().pre_process(args)
func(args)
for plugin in tbp.ProfilePlugin.__subclasses__():
logging.debug(f"Running post process for {plugin}")
plugin().post_process(args)
return wrapper

@plugin_decorator
def main_profile(args):

pp.process_args(args)
create_output_directories(args)

Expand Down Expand Up @@ -163,8 +176,8 @@ def main_profile(args):
tbp.run_snp_dists(args,result)
tbp.update_neighbour_snp_dist_output(args,result)

if args.update_phylo:
tbp.phylo.usher_add_sample(args)
for plugin in tbp.ProfilePlugin.__subclasses__():
plugin().run(args)


### Create folders for results if they don't exist ###
Expand Down Expand Up @@ -413,7 +426,7 @@ algorithm.add_argument('--kmer_counter','--kmer-counter',default='kmc',choices=[
algorithm.add_argument('--coverage_tool','--coverage-tool',default='samtools',choices=["samtools","bedtools"],type=str,help="Kmer counter")
algorithm.add_argument('--suspect',action="store_true",help="Use the suspect suite of tools to add ML predictions")
algorithm.add_argument('--spoligotype',action="store_true",help="Perform in-silico spoligotyping")
algorithm.add_argument('--update_phylo','--update-phylo',action="store_true",help="Update phylogeny using usher (experimental feature)")
# algorithm.add_argument('--update_phylo','--update-phylo',action="store_true",help="Update phylogeny using usher (experimental feature)")
algorithm.add_argument('--call_whole_genome','--call-whole-genome',action="store_true",help="Call variant across the whole genome")
algorithm.add_argument('--snp_dist','--snp-dist',type=int,help="Store variant set and get all samples with snp distance less than this cutoff (experimental feature)")
algorithm.add_argument('--snp_diff_db','--snp-diff_db',type=str,help=argparse.SUPPRESS)
Expand All @@ -428,6 +441,12 @@ algorithm.add_argument('--threads','-t',default=1,help='Threads to use',type=int
algorithm.add_argument('--ram',default=2,help='Maximum memory to use',type=int)
algorithm.add_argument('--implement_rules','--implement-rules',action="store_true",help='Use rules implemented in the resistance library (by default only a note will be made)')

plugins=parser_sub.add_argument_group("Plugin options")
for cls in tbp.ProfilePlugin.__subclasses__():
if hasattr(cls,"__cli_params__"):
for a in cls.__cli_params__:
plugins.add_argument(*a['args'],**a['kwargs'])

other=parser_sub.add_argument_group("Other options")
other.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
other.add_argument('--debug',action='store_true',help=argparse.SUPPRESS)
Expand Down
25 changes: 23 additions & 2 deletions tbprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,28 @@
from .spoligotyping import *
from .output import *
from .snp_dists import *
from .phylo import *
from .docx import *
from abc import ABC, abstractmethod

__version__ = "6.4.0"
__version__ = "6.4.1"


class ProfilePlugin:
"""
A class to define a plugin for tbprofiler
"""

@abstractmethod
def pre_process(self,args):
"""Generic pre-check method"""
pass

@abstractmethod
def run(self):
"""Generic run method"""
pass

@abstractmethod
def post_process(self,args):
"""Generic post-check method"""
pass
69 changes: 5 additions & 64 deletions tbprofiler/phylo.py → tbprofiler/consensus.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,9 @@
import logging
import filelock
import os
from tqdm import tqdm
from pathogenprofiler.utils import run_cmd,cmd_out
import pysam
from joblib import Parallel, delayed
from uuid import uuid4
from pathogenprofiler import run_cmd, cmd_out
import argparse
import os
from uuid import uuid4

def usher_add_sample(args: argparse.Namespace) -> None:
logging.info("Adding sample to phylogeny")


if args.vcf:
args.wg_vcf = args.vcf
else:
args.wg_vcf = args.files_prefix + ".vcf.gz"

args.tmp_masked_vcf = f"{args.files_prefix}.masked.vcf.gz"
args.input_phylo = f"{args.dir}/results/phylo.pb"
args.tmp_output_phylo = f"{args.files_prefix}.pb"
args.output_nwk = f"{args.files_prefix}.nwk"

if not os.path.isfile(args.input_phylo):
logging.error("Phylogeny doesn't exist. Please create one first with `tb-profiler-tools`")
quit("Exiting!")


lock = filelock.SoftFileLock(args.input_phylo + ".lock")

cwd = os.getcwd()
args.tmp_masked_vcf = get_consensus_vcf(args.prefix, args.wg_vcf,args)
with lock:
os.chdir(args.temp)

run_cmd("usher --vcf %(tmp_masked_vcf)s --load-mutation-annotated-tree %(input_phylo)s --save-mutation-annotated-tree %(tmp_output_phylo)s --write-uncondensed-final-tree" % vars(args))
run_cmd("mv uncondensed-final-tree.nh %(output_nwk)s" % vars(args))
for f in ["mutation-paths.txt","placement_stats.tsv"]:
if os.path.exists(f):
os.remove(f)
run_cmd("mv %(tmp_output_phylo)s %(input_phylo)s " % vars(args))
os.chdir(cwd)

def generate_low_dp_mask(bam: str,ref: str,outfile: str,min_dp: int = 10) -> None:
missing_positions = []
Expand All @@ -60,6 +23,8 @@ def generate_low_dp_mask(bam: str,ref: str,outfile: str,min_dp: int = 10) -> Non
for x in missing_positions:
O.write(f"{x[0]}\t{x[1]}\t{x[1]+1}\n")



def generate_low_dp_mask_vcf(vcf: str,outfile: str,min_dp: int = 10) -> None:
missing_positions = []
vcf_obj = pysam.VariantFile(vcf)
Expand All @@ -77,9 +42,6 @@ def generate_low_dp_mask_vcf(vcf: str,outfile: str,min_dp: int = 10) -> None:
for x in missing_positions:
O.write(f"{x[0]}\t{x[1]}\t{x[1]+1}\n")

def prepare_usher(treefile: str,vcf_file: str) -> None:
run_cmd(f"usher --tree {treefile} --vcf {vcf_file} --collapse-tree --save-mutation-annotated-tree phylo.pb")

def prepare_sample_consensus(sample: str,input_vcf: str,args: argparse.Namespace) -> str:
s = sample
tmp_vcf = f"{args.files_prefix}.{s}.vcf.gz"
Expand Down Expand Up @@ -112,24 +74,3 @@ def get_consensus_vcf(sample: str,input_vcf: str,args: argparse.Namespace) -> st
os.remove(tmp_aln)
return outfile

def wrapper_function(s: str,args: argparse.Namespace) -> str:
args.bam = f"{args.dir}/bam/{s}.bam"
return prepare_sample_consensus(s,f"{args.dir}/vcf/{s}.vcf.gz",args)

def calculate_phylogeny(args: argparse.Namespace) -> None:
samples = [l.strip() for l in open(args.samples)]
args.tmp_masked_vcf = f"{args.files_prefix}.masked.vcf.gz"

alignment_file = f"{args.files_prefix}.aln"
consensus_files = [r for r in tqdm(Parallel(n_jobs=args.threads,return_as='generator')(delayed(wrapper_function)(s,args) for s in samples),desc="Generating consensus sequences",total=len(samples))]

run_cmd(f"cat {' '.join(consensus_files)} > {alignment_file}")
alignment_file_plus_ref = f"{args.files_prefix}.aln.plus_ref"
run_cmd(f"cat {args.conf['ref']} > {alignment_file_plus_ref}")
run_cmd(f"cat {alignment_file} >> {alignment_file_plus_ref}")
tmp_vcf = f"{args.files_prefix}.vcf"
run_cmd(f"faToVcf {alignment_file_plus_ref} {tmp_vcf}")
run_cmd(f"iqtree -s {alignment_file} -m GTR+G -nt AUTO",desc="Running IQTree")
prepare_usher(f"{alignment_file}.treefile",tmp_vcf)
run_cmd(f"mv phylo.pb {args.dir}/results/")
os.remove("condensed-tree.nh")
2 changes: 1 addition & 1 deletion tbprofiler/snp_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import filelock
import sqlite3
from tqdm import tqdm
from .phylo import get_consensus_vcf
from .consensus import get_consensus_vcf
import argparse
from .models import ProfileResult, LinkedSample
from typing import List, Tuple
Expand Down
4 changes: 1 addition & 3 deletions tbprofiler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
import re

def process_tb_profiler_args(args: argparse.Namespace) -> None:
if args.snp_dist or args.update_phylo:
if args.snp_dist:
args.call_whole_genome = True
args.call_lineage = False if args.no_lineage else True
if args.vcf and args.spoligotype:
args.spoligotype = False
if args.snp_dist or args.update_phylo:
args.call_whole_genome = True

def get_vcf_samples(vcf_file):
vcf = Vcf(vcf_file)
Expand Down

0 comments on commit 31440f7

Please sign in to comment.