Skip to content

Commit

Permalink
Improve module and argument names
Browse files Browse the repository at this point in the history
  • Loading branch information
SWittouck committed Mar 15, 2022
1 parent 2803bbe commit ebf37c5
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 190 deletions.
32 changes: 16 additions & 16 deletions src/scarap/module_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def run_pan_withchecks(args):
logging.info("welcome to the pan task")

logging.info("checking arguments other than output folder")
check_fastas(args.faapaths)
check_fastas(args.faa_files)
if not args.species is None:
check_infile(args.species)

Expand All @@ -41,9 +41,9 @@ def run_build_withchecks(args):
logging.info("welcome to the build task")

logging.info("checking arguments other than output folder")
check_fastas(args.faapaths)
check_fastas(args.faa_files)
check_infile(args.pangenome)
faapaths = read_fastapaths(args.faapaths)
faapaths = read_fastapaths(args.faa_files)
args.core_prefilter = correct_freq(args.core_prefilter, "core prefilter")
args.core_filter = correct_freq(args.core_filter, "core filter")

Expand All @@ -58,7 +58,7 @@ def run_search_withchecks(args):
logging.info("welcome to the search task")

logging.info("checking arguments other than output folder")
check_fastas(args.qpaths)
check_fastas(args.faa_files)
check_db(args.db)

logging.info("checking dependencies")
Expand Down Expand Up @@ -97,35 +97,35 @@ def run_filter_withchecks(args):

run_filter(args)

def run_supermatrix_withchecks(args):
def run_concat_withchecks(args):

logging.info("welcome to the supermatrix task")
logging.info("welcome to the concat task")

logging.info("checking arguments other than output folder")
check_fastas(args.faapaths)
check_fastas(args.faa_files)
check_infile(args.coregenome)
if not args.ffnpaths is None:
check_fastas(args.ffnpaths)
if not args.ffn_files is None:
check_fastas(args.ffn_files)
args.core_filter = correct_freq(args.core_filter, "core filter")

logging.info("checking dependencies")
check_mafft()

run_supermatrix(args)
run_concat(args)

def run_sample_withchecks(args):

logging.info("welcome to the sample task")

logging.info("checking arguments other than output folder")
check_fastas(args.fastapaths)
fastapaths = read_fastapaths(args.fastapaths)
check_fastas(args.fasta_files)
fastapaths = read_fastapaths(args.fasta_files)
n_genomes = len(fastapaths)
if args.max_genomes > n_genomes:
args.max_genomes = n_genomes
logging.info(f"max_genomes reduced to {args.max_genomes}, since "
"that's the total number of genomes")
check_infile(args.coregenome)
check_infile(args.pangenome)
if args.identity > 100:
logging.error("identity should be between 0 and 1")
sys.exit(1)
Expand Down Expand Up @@ -162,7 +162,7 @@ def run_fetch_withchecks(args):
logging.info("welcome to the fetch task")

logging.info("checking arguments other than output folder")
check_fastas(args.fastapaths)
check_fastas(args.fasta_files)
check_infile(args.genes)

run_fetch(args)
Expand All @@ -172,8 +172,8 @@ def run_core_withchecks(args):
logging.info("welcome to the core pipeline")

logging.info("checking arguments other than output folder")
check_fastas(args.faapaths)
fastapaths = read_fastapaths(args.faapaths)
check_fastas(args.faa_files)
fastapaths = read_fastapaths(args.faa_files)
n_genomes = len(fastapaths)
if args.seeds > n_genomes:
args.seeds = n_genomes
Expand Down
46 changes: 23 additions & 23 deletions src/scarap/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run_pan_nonhier(args):
logging.info("existing pangenome detected - moving on")
return()

faafins = read_fastapaths(args.faapaths)
faafins = read_fastapaths(args.faa_files)

if args.method in ["O-B", "O-D"]:

Expand Down Expand Up @@ -68,7 +68,7 @@ def run_pan_hier(args):
speciesdict.setdefault(row.species, []).append(row.genome)

logging.info("processing faapaths")
faafins = read_fastapaths(args.faapaths)
faafins = read_fastapaths(args.faa_files)
genomedict = {}
for faafin in faafins:
genome = filename_from_path(faafin)
Expand All @@ -86,7 +86,7 @@ def run_pan_hier(args):
os.makedirs(dout, exist_ok = True)
faapathsfio = os.path.join(dout, "faapaths.txt")
write_lines(faapaths, faapathsfio)
run_pan_nonhier(Namespace(faapaths = faapathsfio, outfolder = dout,
run_pan_nonhier(Namespace(faa_files = faapathsfio, outfolder = dout,
threads = args.threads, method = args.method))
speciespanfio = os.path.join(dout, "pangenome.tsv")
speciespanfios.append(speciespanfio)
Expand All @@ -101,9 +101,8 @@ def run_pan_hier(args):
logging.info("started building metapangenome using representatives")
metapandio = os.path.join(args.outfolder, "metapangenome")
os.makedirs(metapandio, exist_ok = True)
run_pan_nonhier(Namespace(faapaths = reprpathsfio,
outfolder = metapandio, threads = args.threads,
method = args.method))
run_pan_nonhier(Namespace(faa_files = reprpathsfio, outfolder = metapandio,
threads = args.threads, method = args.method))

logging.info("inflating metapangenome with species pangenomes")
speciespans = [read_genes(panfin) for panfin in speciespanfios]
Expand All @@ -121,12 +120,12 @@ def run_pan_hier(args):

def run_build(args):

fin_faapaths = args.faapaths
fin_faapaths = args.faa_files
fin_pangenome = args.pangenome
dout = args.outfolder
core_prefilter = args.core_prefilter
core_filter = args.core_filter
max_cores = args.max_cores
max_cores = args.max_core_genes
threads = args.threads

# define output paths/folders
Expand Down Expand Up @@ -206,7 +205,7 @@ def run_build(args):

def run_search(args):

fin_qpaths = args.qpaths
fin_qpaths = args.faa_files
din_db = args.db
dout = args.outfolder
threads = args.threads
Expand Down Expand Up @@ -275,14 +274,14 @@ def run_filter(args):
pangenome = filter_groups(pangenome, orthogroups)
write_tsv(pangenome, os.path.join(args.outfolder, "pangenome.tsv"))

def run_supermatrix(args):
def run_concat(args):

fin_faapaths = args.faapaths
fin_faapaths = args.faa_files
fin_coregenome = args.coregenome
dout = args.outfolder
core_filter = args.core_filter
max_cores = args.max_cores
fin_ffnpaths = args.ffnpaths
max_cores = args.max_core_genes
fin_ffnpaths = args.ffn_files

sm_aas_fout = os.path.join(dout, "supermatrix_aas.fasta")
sm_nucs_fout = os.path.join(dout, "supermatrix_nucs.fasta")
Expand Down Expand Up @@ -407,7 +406,7 @@ def run_sample(args):
return()

logging.info("reading core genome")
core = read_genes(args.coregenome)
core = read_genes(args.pangenome)
fams = core["orthogroup"].unique()
genomes = core["genome"].unique()
logging.info(f"detected {len(fams)} orthogroups in "
Expand All @@ -428,7 +427,7 @@ def run_sample(args):
else:

logging.info("gathering sequences of orthogroups")
fins_faas = read_fastapaths(args.fastapaths)
fins_faas = read_fastapaths(args.fasta_files)
gather_orthogroup_sequences(core, fins_faas, dio_seqs)

logging.info("creating database for alignments")
Expand Down Expand Up @@ -568,25 +567,25 @@ def run_fetch(args):
os.makedirs(dout_seqs, exist_ok = True)

logging.info("reading genes")
genes = read_genes(args.genes)
genes = read_genes(args.pangenome)
fams = genes["orthogroup"].unique()
genomes = genes["genome"].unique()
logging.info(f"detected {len(fams)} orthogroups in "
f"{len(genomes)} genomes")

logging.info("gathering sequences of orthogroups")
fins_fastas = read_fastapaths(args.fastapaths)
fins_fastas = read_fastapaths(args.fasta_files)
gather_orthogroup_sequences(genes, fins_fastas, dout_seqs)

def run_core(args):

fin_faapaths = args.faapaths
fin_faapaths = args.faa_files
dout = args.outfolder
method = args.method
seeds = args.seeds
core_prefilter = args.core_prefilter
core_filter = args.core_filter
max_cores = args.max_cores
max_cores = args.max_core_genes
threads = args.threads

# define paths
Expand All @@ -611,15 +610,16 @@ def run_core(args):
write_tsv(pd.DataFrame({"path": fins_nonseeds}), fout_nonseedpaths)

logging.info("STEP 1 - inferring pangenome of seed genomes")
args_pan = Namespace(faapaths = fout_seedpaths, outfolder = dout_seedpan,
args_pan = Namespace(faa_files = fout_seedpaths, outfolder = dout_seedpan,
method = method, threads = threads)
run_pan(args_pan)

logging.info("STEP 2 - building database of seed core genes and searching "
"in seed faas")
args_build = Namespace(faapaths = fout_seedpaths, pangenome = fout_seedpan,
args_build = Namespace(faa_files = fout_seedpaths, pangenome = fout_seedpan,
outfolder = dout_seedcore, core_prefilter = core_prefilter,
core_filter = core_filter, max_cores = max_cores, threads = threads)
core_filter = core_filter, max_core_genes = max_cores,
threads = threads)
run_build(args_build)

if os.stat(fout_nonseedpaths).st_size == 0:
Expand All @@ -628,7 +628,7 @@ def run_core(args):
return()

logging.info("STEP 3 - identifying core orthogroups in non-seed genomes")
args_search = Namespace(qpaths = fout_nonseedpaths, db = dout_seedcore,
args_search = Namespace(faa_files = fout_nonseedpaths, db = dout_seedcore,
outfolder = dout, threads = threads)
run_search(args_search)

Expand Down
Loading

0 comments on commit ebf37c5

Please sign in to comment.