Skip to content

Commit

Permalink
Merge pull request #50 from martinghunt/mapping_tests
Browse files Browse the repository at this point in the history
Mapping tests
  • Loading branch information
John Tate committed Mar 9, 2016
2 parents b29ec01 + 5222a9b commit 0d6606d
Show file tree
Hide file tree
Showing 14 changed files with 174 additions and 18 deletions.
2 changes: 1 addition & 1 deletion circlator/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Error (Exception): pass

version = '1.1.4'
version = '1.1.5'

def syscall(cmd, allow_fail=False, verbose=False):
if verbose:
Expand Down
16 changes: 10 additions & 6 deletions circlator/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ class Error (Exception): pass
]


def bwa_index(infile, outprefix=None, bwa='bwa', verbose=False):
def bwa_index(infile, outprefix=None, bwa=None, verbose=False):
if bwa is None:
bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)

if outprefix is None:
outprefix = infile

Expand All @@ -24,7 +27,7 @@ def bwa_index(infile, outprefix=None, bwa='bwa', verbose=False):
return

cmd = ' '.join([
bwa, 'index',
bwa.exe(), 'index',
'-p', outprefix,
infile
])
Expand Down Expand Up @@ -53,7 +56,7 @@ def bwa_mem(
bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
unsorted_bam = outfile + '.tmp.unsorted.bam'
tmp_index = outfile + '.tmp.bwa_index'
bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa.exe())
bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa)

cmd = ' '.join([
bwa.exe(), 'mem',
Expand All @@ -76,12 +79,13 @@ def bwa_mem(
thread_mem = int(500 / threads)

# here we have to check for the version of samtools, starting from 1.3 the
# -o flag is used for specifying the samtools sort output-file
# Starting from 1.2 you can use the -o flag
# -o flag is used for specifying the samtools sort output-file.
# Starting from 1.2 you can use the -o flag, but can't have
# -o out.bam at the end of the call, so use new style from 1.3 onwards.

outparam = ''

if samtools.version_at_least('1.2'):
if samtools.version_at_least('1.3'):
outparam = '-o'
samout = outfile
else:
Expand Down
3 changes: 2 additions & 1 deletion circlator/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,8 @@ def _merge_all_bridged_contigs(self, nucmer_hits, ref_contigs, qry_contigs, log_
def _index_fasta(self, infile):
fai = infile + '.fai'
if not os.path.exists(fai):
circlator.common.syscall('samtools faidx ' + infile, verbose=self.verbose)
samtools = circlator.external_progs.make_and_check_prog('samtools')
circlator.common.syscall(samtools.exe() + ' faidx ' + infile, verbose=self.verbose)


def _write_act_files(self, ref_fasta, qry_fasta, coords_file, outprefix):
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@read1
TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGHFEDCBA
@read2
GATCGTCACGAAAGAACCAAGCCGGATCGTGGGAGGGGTACAACTCAGGTGAATTAACGT
+
HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGFEDC
10 changes: 10 additions & 0 deletions circlator/tests/data/mapping_test_aligned_read_to_read.ref.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>1
GTCGTGACGCCTTATGGCCCAGCTAGTCGCGTGCTGCGAACTGAGTGAATATGCCTGGCG
TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC
AACGAAGTGAAATTACGTTAGAATGCCAAAGCTTCAGTTCCATGATTCATGGTGGAGGGT
GTGGCTCTCGCGAATTCCTATCAGTCGACGCATTCCGATCGCGGCTAAGTTGGGTTACCA
ACGTTAATTCACCTGAGTTGTACCCCTCCCACGATCCGGCTTGGTTCTTTCGTGACGATC
GTCTGAACTTACAGTCTTTCAGTCACGACTGACCTGGCGTCATACATTTGTACGACGAGT
TGGTCTTTAGGGCCTGTCAGTGCGGGACCCTTACAATATATGCTCGAACTCCACACAAAC
CCATCAACGTTGGCCCAGGTCAGTCGCTCTTGTATGTTGAGTACAAAGGCCGCAGCTTAG
CCCCGCCCGTGAGGAAAGTC
20 changes: 20 additions & 0 deletions circlator/tests/data/mapping_test_bwa_index.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
>1
TTAGTAGTAATGTGATACCGCTCCAAGCGGGCGGTTTATAGTTAGGGTGGCGCATGAATT
ATCACTGATCCTGAAGGAGGGGCAACGTTCGGAGGCAGGTAGCTCTCATTGCCGATTGCT
CTACAGGGGCGGATTCGGAAAGGTCATCAGCGCAGAGCCGTCTGCTATAATTAATAACGA
AACGTCTGAGACTAGTACAGCTCACGGAACTCCTCTGCCTCAGGAGTCACAAGACACGTG
AAAATTCTCTGCGGCGTCAGGGTTAGTCATTAAAACACTTATCGATGCGAGGGTCCCTGG
TATAGGGGCGAGGAATTAAAAGAGCCACCACGACTGGGTCCAGGGGGGAACGGCTGGGGA
AGACCACAGTAGGTGGGGGTAGGGACGACTTGCCTTCTGATCAGGAAACGGGCCCCAGAG
CATCACTTAGGGTTTTCAGGTGATGTGGCTTCTGCTCACTCGTAGCTTTGCATCCGGTGG
GGAACGATGCGGAGGGCGAG
>2
TTCCTTAGCAGGAGTACTACACTGAAATGTCGTAGACGTATAAACTAATACAAAAAATGG
CGCTGTCGTGCGGAAAGAATGGCAGCCAAACACGTCATCAGGGTTGGTGGTGTCGTTATG
CGACCCGCCGATCTCCGTGCCAGTTACTCAAATCCCGAGGAACGGATACGAACGTCTACC
TAACTTTCGCCCATCTCTAATCTGAAGCTGACTCACAAATAAAGGGCGTGTAAAATACGT
CGTTTGAATGAGTCAAATGGCGCTTACATGATATTACACCGCGTTAAGCTGCCTTGTCCA
TTTTGTATTGTAATCGCTGTGGGACACCCCTTCGCGCTCACGTGGGAGGGATGGATATGA
CGAGACCTCGGCCATTGATGCCCGAAAATTCCCATGCACCGGGAAGTGTAGGTAGATTAA
AATCAGTAGACTTAGAGCTCGGACCGCACGGAGTCTACATCATAGTGTCTAGATGATATT
GGCGCCACGAGAGAGGAGCG
24 changes: 24 additions & 0 deletions circlator/tests/data/mapping_test_bwa_mem.reads.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
@1:1:113:235
ATAATCATCCTCTTTTCGGTGCAACCGCGACAGTCTGGAGGGCGCGACCGGGACTTATCGACTAGGCTGACGGCGG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@1:2:49:172
AAACGTTTCGTCTCGAGACAGATGCATTCGGTTAGACCCGACACGTGGGTTACATGTAGCGAGAATAATCATCCTC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@1:3:205:315
CGTCATACCATTCGGTTCTATGAAACTCGTGTGTGGCACGCATAGGAGAATGACGCGTTGTATCGCGGAAGAGAAT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@2:4:101:224
AGCGTACCATTAAGTACCATCCGTAAGCAGTTACTGCAACAAGATCGCCTACCTGAGCAGCCCGGTGAACTAGGAT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@2:5:27:142
TCAGCAGGCAGAAAATACGAAGATCCAACGAGGGGCATAGATAACACTCTAGACGGCTGTACCACAATTTTCCTAG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@2:6:214:330
GGCGCCTAAAGCGCTCTCCACAGTGTCGGGCGAGATGTTCGAGTCAAGACGAGTCATCCGCGCTAGGCCCTGAATA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
20 changes: 20 additions & 0 deletions circlator/tests/data/mapping_test_bwa_mem.ref.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
>1
TATTGTAGTTTTATGCGTTCTGCACAACCACGACTATAATAGGATTAAAAACGTTTCGTC
TCGAGACAGATGCATTCGGTTAGACCCGACACGTGGGTTACATGTAGCGAGAATAATCAT
CCTCTTTTCGGTGCAACCGCGACAGTCTGGAGGGCGCGACCGGGACTTATCGACTAGGCT
GACGGCGGGTACTTAGCCGATGATCGTCATACCATTCGGTTCTATGAAACTCGTGTGTGG
CACGCATAGGAGAATGACGCGTTGTATCGCGGAAGAGAATCGGGCTAGGGGATTTGAACG
CAACGTACTGCAGATTAGAGTATACCCATGGCTCCTAGAGATACCGGAACTTAGTCATGG
ACGCTCGTCAAATAGTGTGGGGGGCTGCCCCAGATGCAATTCAAATACAGTCCCGCCGGA
CTCAGTGATATGTCCGCCCTCAAGTTCAGTGTACCCGGCGCCGACGTCATAATACTCTGC
TTGGATTTTCGTAGAAGCCA
>2
ACCCTTGGCCACGTGGCAGCGACCTTTCAGCAGGCAGAAAATACGAAGATCCAACGAGGG
GCATAGATAACACTCTAGACGGCTGTACCACAATTTTCCTAGCGTACCATTAAGTACCAT
CCGTAAGCAGTTACTGCAACAAGATCGCCTACCTGAGCAGCCCGGTGAACTAGGATTTAA
GTGACCCGGGTCGTGAGATCTCTCCGGATTCTGGGCGCCTAAAGCGCTCTCCACAGTGTC
GGGCGAGATGTTCGAGTCAAGACGAGTCATCCGCGCTAGGCCCTGAATAACAAACAGGTG
AAAGGGTATCTCTACAAAATGGCATGCTCACCACGGCAAAACACCTGGAGAGTAAACTTT
AGTATCGAGTGGAAACACGGTATTGCGGGGCTGATATAGACCAGCACTATGACGAGATGA
ACTAAGGCCGACGGCCCCTCGAGTTTTAAGCGGCGAGACGCAATGTGTCTACTCTCAGTA
CTCGGTAACTCCTATGTCAC
2 changes: 2 additions & 0 deletions circlator/tests/data/mapping_test_bwa_mem.ref.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 500 3 60 61
2 500 515 60 61
77 changes: 72 additions & 5 deletions circlator/tests/mapping_test.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,88 @@
import unittest
import copy
import filecmp
import os
import pyfastaq
import pysam
import pyfastaq
from circlator import mapping

modules_dir = os.path.dirname(os.path.abspath(mapping.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')


class TestMapping(unittest.TestCase):
def test_bwa_index(self):
'''test bwa_index'''
ref = os.path.join(data_dir, 'mapping_test_bwa_index.fa')
outprefix = 'tmp.mapping_test.bwa_index'
mapping.bwa_index(ref, outprefix)
expected_files = [outprefix + '.' + x for x in ['amb', 'ann', 'bwt', 'pac', 'sa']]

for filename in expected_files:
self.assertTrue(os.path.exists(filename))
os.unlink(filename)


def test_bwa_index_clean(self):
'''test bwa_index_clean'''
files_prefix = 'tmp.test_bwa_index_clean'
test_files = [files_prefix + '.' + x for x in ['amb', 'ann', 'bwt', 'pac', 'sa']]
for filename in test_files:
with open(filename, 'w') as f:
pass

self.assertTrue(os.path.exists(filename))

mapping.bwa_index_clean(files_prefix)

for filename in test_files:
self.assertFalse(os.path.exists(filename))


def test_bwa_mem(self):
'''test bwa_mem'''
# FIXME
pass
ref = os.path.join(data_dir, 'mapping_test_bwa_mem.ref.fa')
reads = os.path.join(data_dir, 'mapping_test_bwa_mem.reads.fq')
outfile = 'tmp.mapping_test_bwa_mem.bam'
mapping.bwa_mem(ref, reads, outfile)
self.assertTrue(os.path.exists(outfile))
self.assertTrue(os.path.exists(outfile + '.bai'))

expected_reads = [
'1:2:49:172',
'1:1:113:235',
'1:3:205:315',
'2:5:27:142',
'2:4:101:224',
'2:6:214:330',
]

sam_reader = pysam.Samfile(outfile, "rb")
got_reads = []

for read in sam_reader.fetch():
got_reads.append(read.qname)
self.assertFalse(read.is_unmapped)

self.assertEqual(expected_reads, got_reads)
os.unlink(outfile)
os.unlink(outfile + '.bai')


def test_aligned_read_to_read(self):
'''test aligned_read_to_read'''
# FIXME
pass
infile = os.path.join(data_dir, 'mapping_test_aligned_read_to_read.bam')
sam_reader = pysam.Samfile(infile, "rb")
aln1, aln2 = [x for x in sam_reader.fetch()]
read1_fq = pyfastaq.sequences.Fastq('read1', 'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC', 'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGHFEDCBA')
read1_fa = pyfastaq.sequences.Fasta('read1', 'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC')
self.assertEqual(read1_fq, mapping.aligned_read_to_read(aln1))
self.assertEqual(read1_fq, mapping.aligned_read_to_read(aln1, revcomp=False))
self.assertEqual(read1_fa, mapping.aligned_read_to_read(aln1, ignore_quality=True))

read2 = pyfastaq.sequences.Fastq('read2', 'GATCGTCACGAAAGAACCAAGCCGGATCGTGGGAGGGGTACAACTCAGGTGAATTAACGT', 'HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGFEDC')
read2_rev = copy.copy(read2)
read2_rev.revcomp()
self.assertEqual(read2, mapping.aligned_read_to_read(aln2))
self.assertEqual(read2_rev, mapping.aligned_read_to_read(aln2, revcomp=False))

6 changes: 3 additions & 3 deletions install_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ start_dir=$(pwd)

BWA_VERSION=0.7.12
PRODIGAL_VERSION=2.6.2
SAMTOOLS_VERSION=1.2
SAMTOOLS_VERSION=1.3
MUMMER_VERSION=3.23
SPADES_VERSION=3.5.0
SPADES_VERSION=3.6.0

BWA_DOWNLOAD_URL="http://downloads.sourceforge.net/project/bio-bwa/bwa-${BWA_VERSION}.tar.bz2"
PRODIGAL_DOWNLOAD_URL="https://github.com/hyattpd/Prodigal/releases/download/v${PRODIGAL_VERSION}/prodigal.linux"
SAMTOOLS_DOWNLOAD_URL="https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2"
MUMMER_DOWNLOAD_URL="http://downloads.sourceforge.net/project/mummer/mummer/${MUMMER_VERSION}/MUMmer${MUMMER_VERSION}.tar.gz"
SPADES_DOWNLOAD_URL="http://spades.bioinf.spbau.ru/release3.5.0/SPAdes-${SPADES_VERSION}-Linux.tar.gz"
SPADES_DOWNLOAD_URL="http://spades.bioinf.spbau.ru/release${SPADES_VERSION}/SPAdes-${SPADES_VERSION}-Linux.tar.gz"


# Make an install location
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='circlator',
version='1.1.4',
version='1.1.5',
description='circlator: a tool to circularise genome assemblies',
packages = find_packages(),
package_data={'circlator': ['data/*']},
Expand All @@ -20,7 +20,7 @@
install_requires=[
'openpyxl',
'pyfastaq >= 3.10.0',
'pysam >= 0.8.1',
'pysam >= 0.8.1, <= 0.8.3',
'pymummer>=0.6.1',
'bio_assembly_refinement>=0.5.0',
],
Expand Down

0 comments on commit 0d6606d

Please sign in to comment.