From 4ea7414085e69469a7d26a7ee9788d88cc310d5a Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 24 Jan 2017 13:52:11 +0000 Subject: [PATCH 1/5] Add --fastq option to bam2reads --- circlator/bamfilter.py | 28 ++++++---- circlator/tasks/bam2reads.py | 2 + circlator/tests/bamfilter_test.py | 52 ++++++++++++++---- ...filter_test_run_keep_unmapped.out.reads.fq | 32 +++++++++++ ...run.bam => bamfilter_test_run_no_qual.bam} | Bin ...bai => bamfilter_test_run_no_qual.bam.bai} | Bin .../data/bamfilter_test_run_with_qual.bam | Bin 0 -> 908 bytes .../data/bamfilter_test_run_with_qual.bam.bai | Bin 0 -> 264 bytes 8 files changed, 92 insertions(+), 22 deletions(-) create mode 100644 circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fq rename circlator/tests/data/{bamfilter_test_run.bam => bamfilter_test_run_no_qual.bam} (100%) rename circlator/tests/data/{bamfilter_test_run.bam.bai => bamfilter_test_run_no_qual.bam.bai} (100%) create mode 100644 circlator/tests/data/bamfilter_test_run_with_qual.bam create mode 100644 circlator/tests/data/bamfilter_test_run_with_qual.bam.bai diff --git a/circlator/bamfilter.py b/circlator/bamfilter.py index b480c46..c14f8fa 100644 --- a/circlator/bamfilter.py +++ b/circlator/bamfilter.py @@ -11,6 +11,7 @@ def __init__( self, bam, outprefix, + fastq_out=False, length_cutoff=100000, min_read_length=250, contigs_to_use=None, @@ -22,8 +23,9 @@ def __init__( if not os.path.exists(self.bam): raise Error('File not found:' + self.bam) + self.fastq_out = fastq_out self.length_cutoff = length_cutoff - self.reads_fa = os.path.abspath(outprefix + '.fasta') + self.reads_outfile = os.path.abspath(outprefix + '.reads') self.log = os.path.abspath(outprefix + '.log') self.log_prefix = log_prefix self.contigs_to_use = self._get_contigs_to_use(contigs_to_use) @@ -69,7 +71,7 @@ def _all_reads_from_contig(self, contig, fout): '''Gets all reads from contig called "contig" and writes to fout''' sam_reader = pysam.Samfile(self.bam, "rb") for read in sam_reader.fetch(contig): - print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout) + print(mapping.aligned_read_to_read(read, ignore_quality=not self.fastq_out), file=fout) def _get_all_unmapped_reads(self, fout): @@ -77,7 +79,7 @@ def _get_all_unmapped_reads(self, fout): sam_reader = pysam.Samfile(self.bam, "rb") for read in sam_reader.fetch(until_eof=True): if read.is_unmapped: - print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout) + print(mapping.aligned_read_to_read(read, ignore_quality=not self.fastq_out), file=fout) def _break_reads(self, contig, position, fout, min_read_length=250): @@ -88,15 +90,15 @@ def _break_reads(self, contig, position, fout, min_read_length=250): if read.pos < position < read.reference_end - 1: split_point = position - read.pos if split_point - 1 >= min_read_length: - sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True).subseq(0, split_point) + sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=not self.fastq_out).subseq(0, split_point) sequence.id += '.left' seqs.append(sequence) if read.query_length - split_point >= min_read_length: - sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True).subseq(split_point, read.query_length) + sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=not self.fastq_out).subseq(split_point, read.query_length) sequence.id += '.right' seqs.append(sequence) else: - seqs.append(mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True)) + seqs.append(mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=not self.fastq_out)) for seq in seqs: if read.is_reverse: @@ -111,7 +113,7 @@ def _exclude_region(self, contig, start, end, fout): for read in sam_reader.fetch(contig): read_interval = pyfastaq.intervals.Interval(read.pos, read.reference_end - 1) if not read_interval.intersects(exclude_interval): - print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout) + print(mapping.aligned_read_to_read(read, ignore_quality=not self.fastq_out), file=fout) def _get_region(self, contig, start, end, fout, min_length=250): @@ -120,15 +122,17 @@ def _get_region(self, contig, start, end, fout, min_length=250): trimming_end = (start == 0) for read in sam_reader.fetch(contig, start, end): read_interval = pyfastaq.intervals.Interval(read.pos, read.reference_end - 1) - seq = mapping.aligned_read_to_read(read, ignore_quality=True, revcomp=False) + seq = mapping.aligned_read_to_read(read, ignore_quality=not self.fastq_out, revcomp=False) if trimming_end: bases_off_start = 0 bases_off_end = max(0, read.reference_end - 1 - end) - seq.seq = seq.seq[:read.query_alignment_end - bases_off_end] + #seq.seq = seq.seq[:read.query_alignment_end - bases_off_end] + seq = seq.subseq(0, read.query_alignment_end - bases_off_end) else: bases_off_start = max(0, start - read.pos + 1) - seq.seq = seq.seq[bases_off_start + read.query_alignment_start:] + #seq.seq = seq.seq[bases_off_start + read.query_alignment_start:] + seq = seq.subseq(bases_off_start + read.query_alignment_start, len(seq)) if read.is_reverse: seq.revcomp() @@ -141,7 +145,7 @@ def run(self): ref_lengths = self._get_ref_lengths() assert len(ref_lengths) > 0 f_log = pyfastaq.utils.open_file_write(self.log) - f_fa = pyfastaq.utils.open_file_write(self.reads_fa) + f_fa = pyfastaq.utils.open_file_write(self.reads_outfile) print(self.log_prefix, '#contig', 'length', 'reads_kept', sep='\t', file=f_log) if self.verbose: print('Getting reads from BAM file', self.bam, flush=True) @@ -187,4 +191,4 @@ def run(self): if self.verbose: print('Finished getting reads.') print('Log file:', self.log) - print('Reads file:', self.reads_fa, flush=True) + print('Reads file:', self.reads_outfile, flush=True) diff --git a/circlator/tasks/bam2reads.py b/circlator/tasks/bam2reads.py index 4fd2cc2..e86b107 100644 --- a/circlator/tasks/bam2reads.py +++ b/circlator/tasks/bam2reads.py @@ -8,6 +8,7 @@ def run(): description = 'Make reads from mapping to be reassembled', usage = 'circlator bam2reads [options] ') parser.add_argument('--discard_unmapped', action='store_true', help='Use this to not keep unmapped reads') + parser.add_argument('--fastq', action='store_true', help='Write fastq output (if quality scores are present in input BAM file)') parser.add_argument('--only_contigs', help='File of contig names (one per line). Only reads that map to these contigs are kept (and unmapped reads, unless --discard_unmapped is used).', metavar='FILENAME') parser.add_argument('--length_cutoff', type=int, help='All reads mapped to contigs shorter than this will be kept [%(default)s]', default=100000, metavar='INT') parser.add_argument('--min_read_length', type=int, help='Minimum length of read to output [%(default)s]', default=250, metavar='INT') @@ -19,6 +20,7 @@ def run(): bam_filter = circlator.bamfilter.BamFilter( options.bam, options.outprefix, + fastq_out=options.fastq, length_cutoff=options.length_cutoff, min_read_length=options.min_read_length, contigs_to_use=options.only_contigs, diff --git a/circlator/tests/bamfilter_test.py b/circlator/tests/bamfilter_test.py index b7336d2..bd7002f 100644 --- a/circlator/tests/bamfilter_test.py +++ b/circlator/tests/bamfilter_test.py @@ -119,11 +119,11 @@ def test_get_region_end(self): os.unlink(tmp) - def test_run_keep_unmapped(self): - '''test run keep unmapped''' + def test_run_keep_unmapped_no_quals(self): + '''test run keep unmapped bam has no quality scores''' outprefix = 'tmp.bamfilter_run' b = bamfilter.BamFilter( - os.path.join(data_dir, 'bamfilter_test_run.bam'), + os.path.join(data_dir, 'bamfilter_test_run_no_qual.bam'), outprefix, length_cutoff=600, min_read_length=100, @@ -131,16 +131,48 @@ def test_run_keep_unmapped(self): ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) - os.unlink(outprefix + '.fasta') + self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) + os.unlink(outprefix + '.reads') + os.unlink(outprefix + '.log') + + b.fastq_out = True + b.run() + expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') + self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) + os.unlink(outprefix + '.reads') + os.unlink(outprefix + '.log') + + + def test_run_keep_unmapped_with_quals(self): + '''test run keep unmapped bam has quality scores''' + outprefix = 'tmp.bamfilter_run' + b = bamfilter.BamFilter( + os.path.join(data_dir, 'bamfilter_test_run_with_qual.bam'), + outprefix, + fastq_out=False, + length_cutoff=600, + min_read_length=100, + contigs_to_use={'contig1', 'contig3', 'contig4'} + ) + b.run() + expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') + self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) + os.unlink(outprefix + '.reads') + os.unlink(outprefix + '.log') + + b.fastq_out = True + b.run() + expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fq') + self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) + os.unlink(outprefix + '.reads') os.unlink(outprefix + '.log') - def test_run_discard_unmapped(self): - '''test run keep unmapped''' + def test_run_discard_unmapped_no_quals(self): + '''test run keep unmapped bam has no quality scores''' outprefix = 'tmp.bamfilter_run' b = bamfilter.BamFilter( - os.path.join(data_dir, 'bamfilter_test_run.bam'), + os.path.join(data_dir, 'bamfilter_test_run_no_qual.bam'), outprefix, length_cutoff=600, min_read_length=100, @@ -149,7 +181,7 @@ def test_run_discard_unmapped(self): ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_discard_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) - os.unlink(outprefix + '.fasta') + self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) + os.unlink(outprefix + '.reads') os.unlink(outprefix + '.log') diff --git a/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fq b/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fq new file mode 100644 index 0000000..450ab15 --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fq @@ -0,0 +1,32 @@ +@contig1:1-100 +TTTAATTTGTCCGTAAATTGGGAGGTCTTCAACCGGGGGCGAATGTCGATCTCGTCGAGGCGTTTGTAAAGTGGTAACAGGGGTCATTGATCACGGTGTA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC +@contig1:301-400 +CGTTGATGATACGAATTACGTAGGGCTCTGGGAGATGCTCGGAACCCCACAGCGTCTATTTTAGTTGCGACATTACGCGGTATGCGCTTCTGCAAGATGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC +@contig4:1-100 +TCGAAAGTACACTTTGAACTCTAAAAGCGGTTACGACCTCTTCCGTTCGATCGATGCGTGAGTACGTACTCTGGATCCAGCCGTGGCAAACCGGGTAACA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC +@contig4:201-350 +CCTCCGCCTGCCTTTGACACACCGGACCTCGGGGGTGTCTAAAAGCCGTCCGTGTGTTGGATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@contig4:651-801 +TGAAGGAGGGGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTTGTCAAGAATTCTATCCTTACTAGTCTATTTTCGATACGCG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGFDBAA +@contig4:851-950 +GAGTACGAGGGACAGATGTCTACACTTGAGCGTACACAAGAATGTGGTACCAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC +@contig4:901-1000 +CAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGGTCTGTTGTACCCCTCTGCCCAACTGCTGAGTAGTTGGGTAAGGACCGAGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC +@unmapped_read +TTATGGTACTTCGTTGCTCCCAAGGCTGAACTGATACATAGAGTGGGCTTTGTGATAGAACCAAACGACAACGAAGCGAATTTCGTCACCATCTCCATAA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHGFDC diff --git a/circlator/tests/data/bamfilter_test_run.bam b/circlator/tests/data/bamfilter_test_run_no_qual.bam similarity index 100% rename from circlator/tests/data/bamfilter_test_run.bam rename to circlator/tests/data/bamfilter_test_run_no_qual.bam diff --git a/circlator/tests/data/bamfilter_test_run.bam.bai b/circlator/tests/data/bamfilter_test_run_no_qual.bam.bai similarity index 100% rename from circlator/tests/data/bamfilter_test_run.bam.bai rename to circlator/tests/data/bamfilter_test_run_no_qual.bam.bai diff --git a/circlator/tests/data/bamfilter_test_run_with_qual.bam b/circlator/tests/data/bamfilter_test_run_with_qual.bam new file mode 100644 index 0000000000000000000000000000000000000000..d8322f5d1ed6ab5f84ae6c31c25cf04dd41811e9 GIT binary patch literal 908 zcmV;719SWziwFb&00000{{{d;LjnM~0CRHmWxT?`z~JEF!WrggWvFM&8SHPBoS$Em zl9`uSlFH=}9LO2$2NcOG$xJuo^zpMYH84OGHNq-tj8)VGBx+~?5)N?Z^mMT*&P~iI z;SBJDGJtL{&@>7^xNmq|=1 z2Fh`<0R6=Q#89_0d|^c98=>)y(fB3|FPIq^0IA10pCADM03VA81ONa4009360763o z0JQ?mmc4FNF%X3vf{-W1K-@fUu z_vib)p4zyfR4uMjAngP#CDs8kgQMtDWWZRZXu@JFlCQu9rKHQPhY#-G3(uZzDtNKE zebn(hYB0Cf#xpJU`twU2RjeS+fGe?J=7#1nqYlB?$fZ_WGx%yU7Kw?)TZhx6I&7z& z89FtVYGK4F)Y%7+1X^bXqjPFF#hL0Q2J5SMogxHZL(0_|0-83{>|Zf0jW982GNy`7 zsZn7y5V!_nV2~)b*_RT!U<09Vtx4KXlj-)%3DZ|~+I{=yFfFw1TU_p`S2vXk9D{@^ z#+YUZBN7epB1e&m&YBiE2;^);2eC3s6-b43Y{pV5QCABbO|tHiaW7s?AAh$KX#c1) z{fC+q&nD3Fi9pBk{NHpqGjs-2Uj{>G>>yoQUuw25cg<`g*&0ppg@|*qmUAHTl2RK& zg3N)fEi92LGjs%K(*TIB6?vej~1)22FHa{Np`SEqBHrDaW7uKMfA=e1UeWiP+#r+ zTY)yNT97k!tk6)RgJCkB&rZ(!YLjTKO9qeu1P{W+C$X`3Z?y;+udylMIqQVJdK}+kT>aZjxNx|FPZW_=AG!W)0;;atZfHz>A^kigVU|?VZVoxCk21X#wz_1HUu407HUXvj5=;9zf$N*U{vUxCb>X=Z?Ll=kW iN0e;kW+wnfdlxeR literal 0 HcmV?d00001 From 26e6f100a327cf511ec560ef43329aed70d9e44e Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 24 Jan 2017 14:18:07 +0000 Subject: [PATCH 2/5] Name output reads fasta or fastq to keep spades happy --- circlator/bamfilter.py | 2 +- circlator/tests/bamfilter_test.py | 38 +++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/circlator/bamfilter.py b/circlator/bamfilter.py index c14f8fa..68e5565 100644 --- a/circlator/bamfilter.py +++ b/circlator/bamfilter.py @@ -25,7 +25,7 @@ def __init__( self.fastq_out = fastq_out self.length_cutoff = length_cutoff - self.reads_outfile = os.path.abspath(outprefix + '.reads') + self.reads_outfile = os.path.abspath(outprefix + ('.fastq' if self.fastq_out else '.fasta')) self.log = os.path.abspath(outprefix + '.log') self.log_prefix = log_prefix self.contigs_to_use = self._get_contigs_to_use(contigs_to_use) diff --git a/circlator/tests/bamfilter_test.py b/circlator/tests/bamfilter_test.py index bd7002f..c88b880 100644 --- a/circlator/tests/bamfilter_test.py +++ b/circlator/tests/bamfilter_test.py @@ -131,15 +131,22 @@ def test_run_keep_unmapped_no_quals(self): ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) - os.unlink(outprefix + '.reads') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) + os.unlink(outprefix + '.fasta') os.unlink(outprefix + '.log') - b.fastq_out = True + b = bamfilter.BamFilter( + os.path.join(data_dir, 'bamfilter_test_run_no_qual.bam'), + outprefix, + fastq_out=True, + length_cutoff=600, + min_read_length=100, + contigs_to_use={'contig1', 'contig3', 'contig4'} + ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) - os.unlink(outprefix + '.reads') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fastq', shallow=False)) + os.unlink(outprefix + '.fastq') os.unlink(outprefix + '.log') @@ -156,15 +163,22 @@ def test_run_keep_unmapped_with_quals(self): ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) - os.unlink(outprefix + '.reads') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) + os.unlink(outprefix + '.fasta') os.unlink(outprefix + '.log') - b.fastq_out = True + b = bamfilter.BamFilter( + os.path.join(data_dir, 'bamfilter_test_run_with_qual.bam'), + outprefix, + fastq_out=True, + length_cutoff=600, + min_read_length=100, + contigs_to_use={'contig1', 'contig3', 'contig4'} + ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fq') - self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) - os.unlink(outprefix + '.reads') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fastq', shallow=False)) + os.unlink(outprefix + '.fastq') os.unlink(outprefix + '.log') @@ -181,7 +195,7 @@ def test_run_discard_unmapped_no_quals(self): ) b.run() expected = os.path.join(data_dir, 'bamfilter_test_run_discard_unmapped.out.reads.fa') - self.assertTrue(filecmp.cmp(expected, outprefix + '.reads', shallow=False)) - os.unlink(outprefix + '.reads') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) + os.unlink(outprefix + '.fasta') os.unlink(outprefix + '.log') From d9b5087883b2c5b925ae5fe764739a72d43e6e27 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 24 Jan 2017 15:09:25 +0000 Subject: [PATCH 3/5] Use fastq_out option --- circlator/merge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/circlator/merge.py b/circlator/merge.py index 9cd81e2..6dfea1f 100644 --- a/circlator/merge.py +++ b/circlator/merge.py @@ -694,12 +694,12 @@ def _iterative_bridged_contig_pair_merge(self, outprefix): ) reads_prefix = outprefix + '.iter.' + str(iteration) + '.reads' - reads_to_map = reads_prefix + '.fasta' - bam_filter = circlator.bamfilter.BamFilter(bam, reads_prefix) + reads_to_map = reads_prefix + ('.fasta' if self.spades_only_assembler else '.fastq') + bam_filter = circlator.bamfilter.BamFilter(bam, reads_prefix, fastq_out=not self.spades_only_assembler) bam_filter.run() assembler_dir = outprefix + '.iter.' + str(iteration) + '.assembly' a = circlator.assemble.Assembler( - reads_prefix + '.fasta', + reads_prefix + ('.fasta' if self.spades_only_assembler else '.fastq'), assembler_dir, threads=self.threads, careful=self.spades_careful, From f1008e679f363752e49b33553e9bf73148ba995f Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 24 Jan 2017 15:10:21 +0000 Subject: [PATCH 4/5] Use fastq_out option --- circlator/tasks/all.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/circlator/tasks/all.py b/circlator/tasks/all.py index 2009bf7..6e81754 100644 --- a/circlator/tasks/all.py +++ b/circlator/tasks/all.py @@ -103,7 +103,7 @@ def run(): original_assembly_renamed = '00.input_assembly.fasta' bam = '01.mapreads.bam' filtered_reads_prefix = '02.bam2reads' - filtered_reads = filtered_reads_prefix + '.fasta' + filtered_reads = filtered_reads_prefix + ('.fastq' if options.assemble_not_only_assembler else '.fasta') assembly_dir = '03.assemble' reassembly = os.path.join(assembly_dir, 'contigs.fasta') merge_prefix = '04.merge' @@ -137,6 +137,7 @@ def run(): bam_filter = circlator.bamfilter.BamFilter( bam, filtered_reads_prefix, + fastq_out=options.assemble_not_only_assembler, length_cutoff=options.b2r_length_cutoff, min_read_length=options.b2r_min_read_length, contigs_to_use=options.b2r_only_contigs, From 7b506843e9feeedb6ce6747e21b1586bdda92f49 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 24 Jan 2017 15:47:03 +0000 Subject: [PATCH 5/5] Verion bump; Fix usage for fasta/q and --assemble_not_only_assembler --- circlator/tasks/all.py | 6 +++--- setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/circlator/tasks/all.py b/circlator/tasks/all.py index 6e81754..75a3c1d 100644 --- a/circlator/tasks/all.py +++ b/circlator/tasks/all.py @@ -15,12 +15,12 @@ def print_message(m, opts): def run(): parser = argparse.ArgumentParser( description = 'Run mapreads, bam2reads, assemble, merge, clean, fixstart', - usage = 'circlator all [options] ') + usage = 'circlator all [options] ') parser.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT') parser.add_argument('--verbose', action='store_true', help='Be verbose') parser.add_argument('--unchanged_code', type=int, help='Code to return when the input assembly is not changed [%(default)s]', default=0, metavar='INT') parser.add_argument('assembly', help='Name of original assembly', metavar='assembly.fasta') - parser.add_argument('reads', help='Name of corrected reads FASTA file', metavar='reads.fasta') + parser.add_argument('reads', help='Name of corrected reads FASTA or FASTQ file', metavar='reads.fasta/q') parser.add_argument('outdir', help='Name of output directory (must not already exist)', metavar='output directory') mapreads_group = parser.add_argument_group('mapreads options') @@ -36,7 +36,7 @@ def run(): parser.add_argument('--assemble_spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...') parser.add_argument('--assemble_spades_use_first', action='store_true', help='Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50') parser.add_argument('--assemble_not_careful', action='store_true', help='Do not use the --careful option with SPAdes (used by default)') - parser.add_argument('--assemble_not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default)') + parser.add_argument('--assemble_not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default). Important: with this option, the input reads must be in FASTQ format, otherwise SPAdes will crash because it needs quality scores to correct the reads.') merge_group = parser.add_argument_group('merge options') merge_group.add_argument('--merge_diagdiff', type=int, help='Nucmer diagdiff option [%(default)s]', metavar='INT', default=25) diff --git a/setup.py b/setup.py index be4bb45..d95e87c 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='circlator', - version='1.4.0', + version='1.4.1', description='circlator: a tool to circularise genome assemblies', packages = find_packages(), package_data={'circlator': ['data/*']},