Skip to content

Commit

Permalink
Merge pull request #53 from martinghunt/expose_spades_careful_only_as…
Browse files Browse the repository at this point in the history
…semble

Expose spades careful, only-assembler
  • Loading branch information
martinghunt committed Mar 9, 2016
2 parents 0d6606d + c847bfb commit e3760e8
Show file tree
Hide file tree
Showing 16 changed files with 20,101 additions and 9 deletions.
27 changes: 20 additions & 7 deletions circlator/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ def __init__(self,
outdir,
threads=1,
spades_kmers=None,
careful=True,
only_assembler=True,
verbose=False,
spades_use_first_success=False,
):
Expand All @@ -23,6 +25,8 @@ def __init__(self,

self.verbose = verbose
self.threads = threads
self.careful = careful
self.only_assembler = only_assembler
self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose)
self.spades_kmers = self._build_spades_kmers(spades_kmers)
self.spades_use_first_success = spades_use_first_success
Expand All @@ -45,17 +49,26 @@ def _build_spades_kmers(self, kmers):
raise Error('Error getting list of kmers from:' + str(kmers))


def run_spades_once(self, kmer, outdir):
cmd = ' '.join([
def _make_spades_command(self, kmer, outdir):
cmd = [
self.spades.exe(),
'-s', self.reads,
'-k', str(kmer),
'--careful',
'--only-assembler',
'-t', str(self.threads),
'-o', outdir,
])
'-t', str(self.threads),
'-k', str(kmer),
]

if self.careful:
cmd.append('--careful')

if self.only_assembler:
cmd.append('--only-assembler')

return ' '.join(cmd)


def run_spades_once(self, kmer, outdir):
cmd = self._make_spades_command(kmer, outdir)
return common.syscall(cmd, verbose=self.verbose, allow_fail=True)


Expand Down
2 changes: 1 addition & 1 deletion circlator/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Error (Exception): pass

version = '1.1.5'
version = '1.2.0'

def syscall(cmd, allow_fail=False, verbose=False):
if verbose:
Expand Down
4,953 changes: 4,953 additions & 0 deletions circlator/data/test_contigs.fa

Large diffs are not rendered by default.

10,001 changes: 10,001 additions & 0 deletions circlator/data/test_for_reads.fa

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions circlator/data/test_make_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
# This is the script that was used to generate the
# test data, which is used when running 'circlator test'
set -e
set -x

ref=test_ref.fa
for_reads=test_for_reads.fa
reads=test_reads.fq.gz
contigs=test_contigs.fa

fastaq make_random_contigs --seed 42 1 300000 $ref
samtools faidx $ref

echo ">1.twice" > $for_reads.$$
samtools faidx $ref 1 1 | grep -v ">" >> $for_reads.$$
fastaq to_fasta $for_reads.$$ $for_reads
rm $for_reads.$$
fastaq to_perfect_reads --seed 42 $for_reads $reads 16000 1 20 8000


samtools faidx test_ref.fa 1:500-148000 > $contigs
samtools faidx test_ref.fa 1:150000-299500 >> $contigs
Binary file added circlator/data/test_reads.fq.gz
Binary file not shown.
5,001 changes: 5,001 additions & 0 deletions circlator/data/test_ref.fa

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions circlator/data/test_ref.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 300000 3 60 61
6 changes: 6 additions & 0 deletions circlator/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def __init__(
min_spades_circular_percent=95,
spades_kmers=None,
spades_use_first_success=False,
spades_careful=True,
spades_only_assembler=True,
ref_end_tolerance=15000,
qry_end_tolerance=1000,
verbose=False,
Expand All @@ -45,6 +47,8 @@ def __init__(
self.min_spades_circular_percent = min_spades_circular_percent
self.spades_kmers = spades_kmers
self.spades_use_first_success = spades_use_first_success
self.spades_careful = spades_careful
self.spades_only_assembler = spades_only_assembler
self.ref_end_tolerance = ref_end_tolerance
self.qry_end_tolerance = qry_end_tolerance
self.verbose = verbose
Expand Down Expand Up @@ -698,6 +702,8 @@ def _iterative_bridged_contig_pair_merge(self, outprefix):
reads_prefix + '.fasta',
assembler_dir,
threads=self.threads,
careful=self.spades_careful,
only_assembler=self.spades_only_assembler,
verbose=self.verbose,
spades_kmers=self.spades_kmers,
spades_use_first_success=self.spades_use_first_success,
Expand Down
6 changes: 6 additions & 0 deletions circlator/tasks/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def run():
assemble_group = parser.add_argument_group('assemble options')
parser.add_argument('--assemble_spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...')
parser.add_argument('--assemble_spades_use_first', action='store_true', help='Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50')
parser.add_argument('--assemble_not_careful', action='store_true', help='Do not use the --careful option with SPAdes (used by default)')
parser.add_argument('--assemble_not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default)')

merge_group = parser.add_argument_group('merge options')
merge_group.add_argument('--merge_diagdiff', type=int, help='Nucmer diagdiff option [%(default)s]', metavar='INT', default=25)
Expand Down Expand Up @@ -143,6 +145,8 @@ def run():
filtered_reads,
assembly_dir,
threads=options.threads,
careful=not options.assemble_not_careful,
only_assembler=not options.assemble_not_only_assembler,
spades_kmers=options.assemble_spades_k,
spades_use_first_success=options.assemble_spades_use_first,
verbose=options.verbose
Expand Down Expand Up @@ -177,6 +181,8 @@ def run():
min_spades_circular_percent=options.merge_min_spades_circ_pc,
spades_kmers=options.assemble_spades_k,
spades_use_first_success=options.assemble_spades_use_first,
spades_careful=not options.assemble_not_careful,
spades_only_assembler=not options.assemble_not_only_assembler,
nucmer_breaklen=options.merge_breaklen,
ref_end_tolerance=options.merge_ref_end,
qry_end_tolerance=options.merge_reassemble_end,
Expand Down
4 changes: 4 additions & 0 deletions circlator/tasks/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ def run():
parser = argparse.ArgumentParser(
description = 'Assemble reads using SPAdes',
usage = 'circlator assemble [options] <in.reads.fasta> <out_dir>')
parser.add_argument('--not_careful', action='store_true', help='Do not use the --careful option with SPAdes (used by default)')
parser.add_argument('--not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default)')
parser.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
parser.add_argument('--verbose', action='store_true', help='Be verbose')
parser.add_argument('--spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...')
Expand All @@ -19,6 +21,8 @@ def run():
options.reads,
options.out_dir,
threads=options.threads,
careful=not options.not_careful,
only_assembler=not options.not_only_assembler,
spades_kmers=options.spades_k,
spades_use_first_success=options.spades_use_first,
verbose=options.verbose
Expand Down
4 changes: 4 additions & 0 deletions circlator/tasks/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ def run():
parser.add_argument('--min_length_merge', type=int, help='Minimum length of nucmer hit to use when merging [%(default)s]', metavar='INT', default=4000)
parser.add_argument('--breaklen', type=int, help='breaklen option used by nucmer [%(default)s]', metavar='INT', default=500)
parser.add_argument('--min_spades_circ_pc', type=float, help='Min percent of contigs needed to be covered by nucmer hits to spades circular contigs [%(default)s]', metavar='FLOAT', default=95)
parser.add_argument('--assemble_not_careful', action='store_true', help='Do not use the --careful option with SPAdes (used by default)')
parser.add_argument('--assemble_not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default)')
parser.add_argument('--spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...')
parser.add_argument('--spades_use_first', action='store_true', help='Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50')
parser.add_argument('--ref_end', type=int, help='max distance allowed between nucmer hit and end of input assembly contig [%(default)s]', metavar='INT', default=15000)
Expand All @@ -35,6 +37,8 @@ def run():
nucmer_min_length_for_merges=options.min_length_merge,
nucmer_breaklen=options.breaklen,
min_spades_circular_percent=options.min_spades_circ_pc,
spades_careful=not options.assemble_not_careful,
spades_only_assembler=not options.assemble_not_only_assembler,
spades_kmers=options.spades_k,
spades_use_first_success=options.spades_use_first,
ref_end_tolerance=options.ref_end,
Expand Down
55 changes: 55 additions & 0 deletions circlator/tasks/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import argparse
import os
import shutil
import sys
import subprocess
import circlator

def run():
parser = argparse.ArgumentParser(
description = 'Run Circlator on a small test dataset',
usage = 'ariba test [options] <outdir>')
parser.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
parser.add_argument('outdir', help='Name of output directory')
options = parser.parse_args()

print('Running Circlator on test data...')

try:
os.mkdir(options.outdir)
os.chdir(options.outdir)
except:
print('Error making output directory "', options.outdir, '". Cannot continue.', sep='', file=sys.stderr)
sys.exit(1)

print('Made output directory. Copying test data files into it:')

modules_dir = os.path.dirname(os.path.abspath(circlator.__file__))
test_data_dir = os.path.join(modules_dir, 'data')

for filename in ['test_contigs.fa', 'test_reads.fq.gz']:
shutil.copy(os.path.join(test_data_dir, filename), filename)
print(' copied', filename)


cmd = ' '.join([
sys.argv[0],
'all',
'--threads', str(options.threads),
'--verbose',
'--assemble_spades_use_first',
'test_contigs.fa',
'test_reads.fq.gz',
'OUT',
])

print('\nRunning Circlator with:', cmd, '', sep='\n')

return_code = subprocess.call(cmd, shell=True)

if return_code != 0:
print('\nSomething went wrong. See above for error message(s). Return code was', return_code)
sys.exit(1)

print('-' * 79)
print('Finished run on test data OK')
23 changes: 23 additions & 0 deletions circlator/tests/assemble_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,26 @@ def test_build_spades_kmers(self):
with self.assertRaises(assemble.Error):
self.assembler._build_spades_kmers('41,spam')


def test_make_spades_command(self):
'''test _make_spades_command'''
cmd_start = ' '.join([
self.assembler.spades.exe(),
'-s', os.path.join(data_dir, 'assemble_test.dummy_reads.fa'),
])

self.assertEqual(cmd_start + ' -o out -t 1 -k 41 --careful --only-assembler', self.assembler._make_spades_command(41, 'out'))
self.assertEqual(cmd_start + ' -o out -t 1 -k 43 --careful --only-assembler', self.assembler._make_spades_command(43, 'out'))
self.assertEqual(cmd_start + ' -o out2 -t 1 -k 41 --careful --only-assembler', self.assembler._make_spades_command(41, 'out2'))

self.assembler.careful = False
self.assertEqual(cmd_start + ' -o out -t 1 -k 41 --only-assembler', self.assembler._make_spades_command(41, 'out'))
self.assembler.careful = True

self.assembler.only_assembler = False
self.assertEqual(cmd_start + ' -o out -t 1 -k 41 --careful', self.assembler._make_spades_command(41, 'out'))
self.assembler.only_assembler = True

self.assembler.threads = 2
self.assertEqual(cmd_start + ' -o out -t 2 -k 41 --careful --only-assembler', self.assembler._make_spades_command(41, 'out'))

2 changes: 2 additions & 0 deletions scripts/circlator
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tasks = {
'fixstart': 'Change start position of circular sequences',
'minimus2': 'Run the minimus2 based circularisation pipeline',
'get_dnaa': 'Download file of dnaA (or other of user\'s choice) genes',
'test': 'Run Circlator on a small test set',
'version': 'Print version and exit',
}

Expand All @@ -29,6 +30,7 @@ ordered_tasks = [
'minimus2',
'get_dnaa',
'progcheck',
'test',
'version'
]

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='circlator',
version='1.1.5',
version='1.2.0',
description='circlator: a tool to circularise genome assemblies',
packages = find_packages(),
package_data={'circlator': ['data/*']},
Expand Down

0 comments on commit e3760e8

Please sign in to comment.