diff --git a/.gitignore b/.gitignore index 5fcae23..ef36fa0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,6 @@ /blib /docs /pm_to_blib -/MYMETA.yml -/MYMETA.json /MANIFEST.bak /install_tmp /setup.log diff --git a/Changes b/Changes index 6c12ea3..2e6cf74 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,8 @@ +1.0.0 + bam_stats.pl actually installed now. + Basic *.bas perl access module. + Upgraded libmaus/biobambam to resolve patch and CentOS install issue. + Reference implementations ensure unique RG:ID between files. 0.3.0 Changes for the re-worked PanCancer submission SOP. Patch for libmaus issue as not going to be a release in time. diff --git a/INSTALL b/INSTALL index bd4bd5c..03c46c9 100644 --- a/INSTALL +++ b/INSTALL @@ -14,14 +14,25 @@ OS: Other Software For installation to proceed you require the following packages: - zlib1g-dev - g++ - dh-autoreconf - libncurses-dev - pkg-config - libgd2-xpm-dev - - This listing is based on Ubuntu 12.04 + For Ubuntu (tested with 12.04) + apt-get + zlib1g-dev + g++ + dh-autoreconf + libncurses-dev + pkg-config + libgd2-xpm-dev + + For CentOS (tested with 6.4) + yum install + zlib-devel + gcc-c++ + autoconf + automake + libtool + boost-devel.x86_64 + ncurses-devel.x86_64 + gd-devel setup.sh will install biobambam diff --git a/MANIFEST b/MANIFEST index ba33dcb..6757b7b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -10,6 +10,7 @@ docs.tar.gz INSTALL lib/PCAP.pm lib/PCAP/Bam.pm +lib/PCAP/Bam/Bas.pm lib/PCAP/Bam/Stats.pm lib/PCAP/Bwa.pm lib/PCAP/Bwa/Meta.pm @@ -20,7 +21,6 @@ LICENSE Makefile.PL MANIFEST This list of files MANIFEST.SKIP -patches/fileModeFix.diff prerelease.sh README.md setup.sh @@ -31,6 +31,7 @@ t/2_pl_compile.t t/3_external_progs.t t/pcap.t t/pcapBam.t +t/pcapBamBas.t t/pcapBamStats.t t/pcapBwa.t t/pcapBwaMeta.t @@ -44,6 +45,7 @@ testData/2_1.fq testData/3_2.fq testData/data.file testData/empty.bam +testData/empty.bam.bas testData/empty.file testData/empty.fq testData/empty_r1_1.fq @@ -64,4 +66,5 @@ testData/not_really_a.bam testData/paired.bam testData/Stats.bam testData/Stats.bam.bas +testData/test.bam.bas testData/unpaired.bam diff --git a/MYMETA.json b/MYMETA.json new file mode 100644 index 0000000..d70d5dc --- /dev/null +++ b/MYMETA.json @@ -0,0 +1,57 @@ +{ + "abstract" : "unknown", + "author" : [ + "unknown" + ], + "dynamic_config" : 0, + "generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560", + "license" : [ + "unknown" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "PCAP", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "Bio::DB::Sam" : "1.39", + "Bio::Root::Version" : "1.006923", + "Capture::Tiny" : "0.24", + "Const::Fast" : "0.014", + "Data::UUID" : "1.219", + "Devel::Cover" : "1.09", + "File::Which" : "0.05", + "GD" : "2.52", + "IPC::System::Simple" : "1.25", + "List::Util" : "1.38", + "Math::Gradient" : "0.04", + "Module::Build" : "0.42", + "Pod::Coverage" : "0.23", + "Proc::ProcessTable" : "0.5", + "Term::UI" : "0.42", + "Test::Fatal" : "0.013", + "Try::Tiny" : "0.19" + } + } + }, + "release_status" : "stable", + "version" : "v1.0.0" +} diff --git a/MYMETA.yml b/MYMETA.yml new file mode 100644 index 0000000..d132837 --- /dev/null +++ b/MYMETA.yml @@ -0,0 +1,38 @@ +--- +abstract: unknown +author: + - unknown +build_requires: + ExtUtils::MakeMaker: 0 +configure_requires: + ExtUtils::MakeMaker: 0 +dynamic_config: 0 +generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560' +license: unknown +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: PCAP +no_index: + directory: + - t + - inc +requires: + Bio::DB::Sam: 1.39 + Bio::Root::Version: 1.006923 + Capture::Tiny: 0.24 + Const::Fast: 0.014 + Data::UUID: 1.219 + Devel::Cover: 1.09 + File::Which: 0.05 + GD: 2.52 + IPC::System::Simple: 1.25 + List::Util: 1.38 + Math::Gradient: 0.04 + Module::Build: 0.42 + Pod::Coverage: 0.23 + Proc::ProcessTable: 0.5 + Term::UI: 0.42 + Test::Fatal: 0.013 + Try::Tiny: 0.19 +version: v1.0.0 diff --git a/Makefile.PL b/Makefile.PL index 48c032b..3a9cd73 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -31,6 +31,7 @@ WriteMakefile( EXE_FILES => [qw( bin/bam_to_sra_sub.pl bin/bwa_aln.pl bin/bwa_mem.pl + bin/bam_stats.pl bin/diff_bams.pl bin/monitor.pl)], PREREQ_PM => { diff --git a/bin/bwa_aln.pl b/bin/bwa_aln.pl index 28f3983..04b083f 100755 --- a/bin/bwa_aln.pl +++ b/bin/bwa_aln.pl @@ -43,9 +43,6 @@ BEGIN use PCAP::Bwa; use PCAP::Bwa::Meta; -my @mod_list = keys %INC; -exit 0 if(first {$_ =~ m|^Devel/Cover| } @mod_list); - const my @VALID_PROCESS => qw(bam2fq aln sampe mark); const my %INDEX_FACTOR => ( 'bam2fq' => 1, 'aln' => 2, diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index f10206c..e046487 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -43,9 +43,6 @@ BEGIN use PCAP::Bwa::Meta; use version; -my @mod_list = keys %INC; -exit 0 if(first {$_ =~ m|^Devel/Cover| } @mod_list); - const my @VALID_PROCESS => qw(bwamem mark); const my %INDEX_FACTOR => ( 'bwamem' => 1, 'mark' => 1,); diff --git a/docs.tar.gz b/docs.tar.gz index 71b6e18..c6ef546 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/PCAP.pm b/lib/PCAP.pm index 124649a..9f4eab2 100644 --- a/lib/PCAP.pm +++ b/lib/PCAP.pm @@ -23,7 +23,7 @@ package PCAP; use strict; use Const::Fast qw(const); -our $VERSION = '0.3.0'; +our $VERSION = '1.0.0'; const my $LICENSE => "################# @@ -33,11 +33,13 @@ const my $LICENSE => #################"; const my $DEFAULT_PATH => 'biobambam,samtools,bwa'; -const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,samtools,bwa', - '0.1.1' => 'biobambam,bwa', - '0.1.2' => 'biobambam', - '0.2.0' => 'biobambam', - '0.3.0' => '', +const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,samtools,bwa', + '0.1.1' => 'biobambam,bwa', + '0.1.2' => 'biobambam', + '0.2.0' => 'biobambam', + '0.2.99' => 'biobambam', + '0.3.0' => 'biobambam', + '1.0.0' => '', ); sub license { diff --git a/lib/PCAP/Bam.pm b/lib/PCAP/Bam.pm index 09c4725..877198d 100644 --- a/lib/PCAP/Bam.pm +++ b/lib/PCAP/Bam.pm @@ -33,6 +33,7 @@ use File::Which qw(which); use Bio::DB::Sam; use Carp qw(croak); use List::Util qw(first); +use Data::UUID; use PCAP::Threaded; @@ -51,7 +52,7 @@ sub new { } sub rg_line_for_output { - my $bam = shift; + my ($bam, $uniq_id) = @_; my $sam = sam_ob($bam); my $header = $sam->header->text; my $rg_line; @@ -59,6 +60,10 @@ sub rg_line_for_output { my $new_rg = $1; die "BAM file appears to contain data for multiple readgroups, not supported: \n\n$header\n" if(defined $rg_line); $rg_line = $new_rg; + if($uniq_id) { + my $uuid = lc Data::UUID->new->create_str; + $rg_line =~ s/\tID:[^\t]+/\tID:$uuid/; + } $rg_line =~ s/\t/\\t/g; } return ($rg_line, $sam); # also return the SAM object @@ -359,6 +364,8 @@ The SAM object is also returned should it be useful for other calls Takes BAM or Bio::DB::Sam object as input and returns the string representation for the RG line. Intended for use when adding RG to BWA MEM output and is only useful in single RG BAMs +Optional second boolean arg causes ID to be replaced with a UUID. + The SAM object is also returned should it be useful for other calls =item sam_ob diff --git a/lib/PCAP/Bam/Bas.pm b/lib/PCAP/Bam/Bas.pm new file mode 100644 index 0000000..c96b88f --- /dev/null +++ b/lib/PCAP/Bam/Bas.pm @@ -0,0 +1,119 @@ +package PCAP::Bam::Bas; + +##########LICENCE########## +# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project +# Copyright (C) 2014 ICGC PanCancer Project +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not see: +# http://www.gnu.org/licenses/gpl-2.0.html +##########LICENCE########## + +use PCAP; +our $VERSION = PCAP->VERSION; + +use strict; +use English qw( -no_match_vars ); +use warnings FATAL=>'all'; +use autodie qw( :all ); +use Carp qw(croak carp); + +sub new { + my ($class, $bas) = @_; + my $self = { }; + bless $self, $class; + $self->_init($bas); + return $self; +} + +sub _init { + my ($self, $bas) = @_; + croak "No bas file defined" if(!defined $bas); + die "*.bas file: $bas does not exist" unless(-e $bas); + die "*.bas file: $bas is empty" unless(-s $bas); + open my $IN, '<', $bas; + $self->bas_keys($IN); + $self->_import_data($IN); + close $IN; + return 1; +} + +sub _import_data { + my ($self, $fh) = @_; + while(my $line = <$fh>) { + chomp $line; + my @bits = split /\t/, $line; + my %rg; + for my $key(@{$self->bas_keys}) { + $rg{$key} = $bits[$self->{'key_pos_map'}->{$key}]; + } + $self->{'_data'}->{$rg{'readgroup'}} = \%rg; + } + return 1; +} + +sub bas_keys { + my ($self, $key_fh) = @_; + croak "bas_keys should only be initialised once\n" if(exists $self->{'keys'} && defined $key_fh); + if(defined $key_fh) { + my $line = <$key_fh>; + chomp $line; + my @head = split /\t/, $line; + my %key_pos_map; + my $pos=0; + for my $key(@head) { + $key_pos_map{$key} = $pos++; + } + $self->{'keys'} = \@head; + $self->{'key_pos_map'} = \%key_pos_map; + } + return $self->{'keys'}; +} + +sub get { + my ($self, $rg, $key) = @_; + die qq{Readgroup '$rg' does not exist\n} unless(exists $self->{'_data'}->{$rg}); + return exists $self->{'_data'}->{$rg}->{$key} ? $self->{'_data'}->{$rg}->{$key} : undef; +} + +1; + +__END__ + +=head1 PCAP::Bam::Bas + +Convenience class for accessing data in a *.bas file. + +=head2 METHODS + +=over 2 + +=item new + +Construct an access object for BAM statistics file. + + my $bas_ob = PCAP::Bam::Bas->new($bas); + +=item bas_keys + +Returns the list of available keys for this BAS file. + +=item get + +Retrieve a value by its readgroup and key: + + $bas->($rg, 'median_insert_size'); + +NOTE: Returns undef if a key is not available. + +=back diff --git a/lib/PCAP/Bam/Stats.pm b/lib/PCAP/Bam/Stats.pm index 8878042..f958207 100644 --- a/lib/PCAP/Bam/Stats.pm +++ b/lib/PCAP/Bam/Stats.pm @@ -101,7 +101,18 @@ sub _parse_header { sub _process_reads { my ($groups, $sam, $qualiy_scoring) = @_; my $bam = $sam->bam; + my $processed_x = 1; + my $start = time; + my $processed_x_mill = 0; while (my $a = $bam->read1) { + if($processed_x++ == 1_000_000) { + $processed_x_mill++; + $processed_x = 1; + my $end = time; + my $elapsed = $end - $start; + $start = $end; + warn "$processed_x_mill mill. reads processed [${elapsed}s. this block]\n"; + } my $flag = $a->flag; next if($flag & $NON_PRI); # skip secondary hits so no double counts next if($flag & $V_FAIL); # skip vendor fail as generally aren't considered diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index 32f7e38..61014b6 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -72,7 +72,7 @@ sub bwa_mem { $rg_line = q{'}.$input->rg_header(q{\t}).q{'}; } else { - ($rg_line, undef) = PCAP::Bam::rg_line_for_output($input->in); + ($rg_line, undef) = PCAP::Bam::rg_line_for_output($input->in, 1); } my $bwa = which('bwa') || die "Unable to find 'bwa' in path"; diff --git a/lib/PCAP/Bwa/Meta.pm b/lib/PCAP/Bwa/Meta.pm index fef51ff..f0f7f6a 100644 --- a/lib/PCAP/Bwa/Meta.pm +++ b/lib/PCAP/Bwa/Meta.pm @@ -32,6 +32,7 @@ use Carp qw( croak ); use Const::Fast qw(const); use List::Util qw(first); use File::Spec; +use Data::UUID; use PCAP::Bam; @@ -53,7 +54,7 @@ sub _init { my ($self, $opts) = @_; croak "'rg' is auto-populated, to initialise a start value see PCAP::Bwa::Meta::set_rg_index" if(exists $opts->{'rg'}); - for my $key(keys $opts) { + for my $key(keys %{$opts}) { croak "'$key' is not a valid parameter for object initialisation" unless(first {$key eq $_} @INIT_KEYS); croak "'$key' is not a scalar, only simple values are expected" if(ref $opts->{$key} ne q{}); $self->{$key} = $opts->{$key}; @@ -108,26 +109,32 @@ sub rg_header { } else { # use the BAM object to grab existing header + my $bam_elements = {}; unless(exists $self->{'fastq'}) { my $bam = PCAP::Bam->new($self->{'in'}); my $header_set = $bam->read_group_info->[0]; - $elements = $header_set; + $bam_elements = $header_set; } for my $required(@REQUIRED_RG_ELEMENTS) { - croak "'$required' is manditory for RG header" unless(exists $elements->{$required}); + croak "'$required' is manditory for RG header" unless(exists $elements->{$required} || exists $bam_elements->{$required}); } my @elements = ('@RG'); - if(exists $elements->{'ID'}) { - push @elements, 'ID:'.$elements->{'ID'}; - } - else { - push @elements, 'ID:'.$self->rg; - } - for my $key(sort keys %{$elements}) { + push @elements, 'ID:'.(lc Data::UUID->new->create_str); + + my %all_keys; + for my $key(sort keys %{$bam_elements}){ $all_keys{$key} = 1; } + for my $key(sort keys %{$elements}){ $all_keys{$key} = 1; } + + for my $key(sort keys %all_keys) { next if($key eq 'ID'); - push @elements, sprintf '%s:%s', $key, $elements->{$key}; + if(exists $elements->{$key}) { + push @elements, sprintf '%s:%s', $key, $elements->{$key}; + } + elsif(exists $bam_elements->{$key}) { + push @elements, sprintf '%s:%s', $key, $bam_elements->{$key}; + } } $self->{'rg_header'} = \@elements; } diff --git a/patches/fileModeFix.diff b/patches/fileModeFix.diff deleted file mode 100644 index 1c55c61..0000000 --- a/patches/fileModeFix.diff +++ /dev/null @@ -1,11 +0,0 @@ ---- src/libmaus/aio/PosixFdInput.hpp 2014-03-19 09:28:52.000000000 +0000 -+++ src/libmaus/aio/PosixFdInput.hpp.new 2014-03-31 14:54:28.756508916 +0100 -@@ -49,7 +49,7 @@ - #if defined(__APPLE__) - return O_RDONLY; - #else -- return O_RDONLY|O_NOATIME|O_LARGEFILE; -+ return O_RDONLY|O_LARGEFILE; - #endif - } - diff --git a/prerelease.sh b/prerelease.sh index bc6f3f6..f7c1c61 100755 --- a/prerelease.sh +++ b/prerelease.sh @@ -54,7 +54,7 @@ rm -f MANIFEST rm -rf install_tmp perl Makefile.PL > /dev/null make manifest &> /dev/null -rm -f Makefile MYMETA.json MYMETA.yml MANIFEST.bak pm_to_blib +rm -f Makefile MANIFEST.bak pm_to_blib # change back to original dir cd $INIT_DIR diff --git a/setup.sh b/setup.sh index b6d0453..4464475 100755 --- a/setup.sh +++ b/setup.sh @@ -3,9 +3,8 @@ SOURCE_BWA="https://github.com/lh3/bwa/archive/0.7.7.tar.gz" SOURCE_SNAPPY="https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz" SOURCE_IOLIB="http://downloads.sourceforge.net/project/staden/io_lib/1.13.4/io_lib-1.13.4.tar.gz" -# remove patch from LIBMAUS section when next release available -SOURCE_LIBMAUS="https://github.com/gt1/libmaus/archive/0.0.108-release-20140319092837.tar.gz" -SOURCE_BIOBAMBAM="https://github.com/gt1/biobambam/archive/0.0.129-release-20140319092922.tar.gz" +SOURCE_LIBMAUS="https://github.com/gt1/libmaus/archive/0.0.112-release-20140411095503.tar.gz" +SOURCE_BIOBAMBAM="https://github.com/gt1/biobambam/archive/0.0.131-release-20140411101450.tar.gz" SOURCE_SAMTOOLS="https://github.com/samtools/samtools/archive/0.1.19.tar.gz" done_message () { @@ -161,7 +160,6 @@ if [[ ",$COMPILE," == *,biobambam,* ]] ; then ( get_distro "libmaus" $SOURCE_LIBMAUS cd $SETUP_DIR/libmaus - patch "src/libmaus/aio/PosixFdInput.hpp" < "$INIT_DIR/patches/fileModeFix.diff" autoreconf -i -f ./configure --prefix=$INST_PATH --with-snappy=$INST_PATH --with-io_lib=$INST_PATH make -j$CPU diff --git a/t/1_pm_compile.t b/t/1_pm_compile.t index a44c940..8f1feb4 100644 --- a/t/1_pm_compile.t +++ b/t/1_pm_compile.t @@ -17,7 +17,7 @@ my $lib_path = "$Bin/../lib"; # Add modules here that cannot be instantiated (should be extended and have no 'new') # or need a set of inputs - these should be tested in own test script -use constant MODULE_SKIP => qw(PCAP::Threaded PCAP::Bwa::Meta PCAP::Bam::Stats); +use constant MODULE_SKIP => qw(PCAP::Threaded PCAP::Bwa::Meta PCAP::Bam::Bas PCAP::Bam::Stats); my $init_cwd = getcwd; diff --git a/t/3_external_progs.t b/t/3_external_progs.t index 1f6742d..91290d3 100644 --- a/t/3_external_progs.t +++ b/t/3_external_progs.t @@ -17,15 +17,15 @@ my %EXPECTED_VERSION = ( 'bamcollate2' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.129']}, + 'version' => ['0.0.131']}, 'bammarkduplicates' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.129']}, + 'version' => ['0.0.131']}, 'bamsort' => { 'get' => q{ -h}, 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => ['0.0.129']}, + 'version' => ['0.0.131']}, 'bwa' => { 'get' => q{}, 'match' => qr/Version: ([[:digit:]\.]+[[:alpha:]]?)/, # we don't care about the revision number diff --git a/t/pcap.t b/t/pcap.t index b752a03..174c1a7 100644 --- a/t/pcap.t +++ b/t/pcap.t @@ -1,12 +1,9 @@ use strict; use Test::More; -use Test::Fatal; use Const::Fast qw(const); -use File::Temp qw(tempdir); const my $MODULE => 'PCAP'; -my $obj; subtest 'Initialisation checks' => sub { use_ok($MODULE); }; @@ -15,7 +12,7 @@ subtest 'Initialisation checks' => sub { ok(PCAP::license(), 'License text retrieved'); is(PCAP::upgrade_path(), 'biobambam,samtools,bwa', 'Default program install when no previous version'); -is(PCAP::upgrade_path('9.9.9'), 'biobambam,samtools,bwa', 'Default program install when unkown version installed'); +is(PCAP::upgrade_path('9.9.9'), 'biobambam,samtools,bwa', 'Default program install when unknown version installed'); done_testing(); diff --git a/t/pcapBamBas.t b/t/pcapBamBas.t new file mode 100644 index 0000000..c93efd1 --- /dev/null +++ b/t/pcapBamBas.t @@ -0,0 +1,35 @@ +use strict; +use Test::More; +use Test::Fatal; +use File::Spec; +use Try::Tiny qw(try catch finally); +use Const::Fast qw(const); + +const my $MODULE => 'PCAP::Bam::Bas'; +const my $RG_1 => 1; +const my $EXP_MEDIAN => '462.000'; + +use FindBin qw($Bin); +my $test_data = "$Bin/../testData"; + +my $bas = File::Spec->catfile($test_data, 'test.bam.bas'); +my $empty_bas = File::Spec->catfile($test_data, 'empty.bam.bas'); + +subtest 'Initialisation checks' => sub { + use_ok($MODULE); + my $obj = new_ok($MODULE => [$bas]); + + like(exception { $MODULE->new; }, qr/No bas file defined/, 'Expected error, no file provided'); + like(exception { $MODULE->new('fake'); }, qr/\*\.bas file: .* does not exist/, q{Expected error, file doesn't exist}); + like(exception { $MODULE->new($empty_bas); }, qr/\*\.bas file: .* is empty/, q{Expected error, file empty}); + like(exception { $obj->bas_keys(1); }, qr/bas_keys should only be initialised once/, q{Expected error, value passed to pre-initialised function}); +}; + +subtest 'Access checks' => sub { + my $obj = new_ok($MODULE => [$bas]); + is($obj->get($RG_1, 'median_insert_size'), $EXP_MEDIAN, 'Get expected value with correct key'); + is($obj->get($RG_1, 'wibble'), undef, 'Get undef with unknown key'); + like(exception { $obj->get(99, 'wibble'); }, qr/Readgroup '.*' does not exist/, 'Expected error, unkown RG'); +}; + +done_testing(); diff --git a/t/pcapBwaMeta.t b/t/pcapBwaMeta.t index 23ff98f..2d58e71 100644 --- a/t/pcapBwaMeta.t +++ b/t/pcapBwaMeta.t @@ -12,9 +12,10 @@ const my $MODULE => 'PCAP::Bwa::Meta'; const my $REF_INIT => { 'in' => 'somefile', 'temp' => 'somepath',}; const my $SET_RG_VAL => 5; +const my $RG_DEFAULT => qr/\@RG\tID:[a-z0-9]{8}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{12}\tCN:SANGER\tDS:short\tLB:SAMPLE_LIBRARY\tPI:500\tPL:HiSeq\tPU:1_1\tSM:SAMPLE_NAME/; const my $RG_TAGS => {'SM' => 'wibble', 'LB' => 'wobble', }; -const my $RG_STRING => q{@RG\tID:1\tCN:SANGER\tDS:short\tLB:SAMPLE_LIBRARY\tPI:500\tPL:HiSeq\tPU:1_1\tSM:SAMPLE_NAME}; -const my $RG_PRINT => qq{\@RG\tID:1\tCN:SANGER\tDS:short\tLB:SAMPLE_LIBRARY\tPI:500\tPL:HiSeq\tPU:1_1\tSM:SAMPLE_NAME}; +const my $RG_STRING => qr/\@RG\\tID:[a-z0-9]{8}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{12}\\tCN:SANGER\\tDS:short\\tLB:wobble\\tPI:500\\tPL:HiSeq\\tPU:1_1\\tSM:wibble/; +const my $RG_PRINT => qr/\@RG\tID:[a-z0-9]{8}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{4}\-[a-z0-9]{12}\tCN:SANGER\tDS:short\tLB:wobble\tPI:500\tPL:HiSeq\tPU:1_1\tSM:wibble/; const my @VALID_FASTQ_EXT => qw(fastq fq fastq.gz fq.gz); @@ -206,14 +207,20 @@ subtest 'Accessors' => sub { subtest 'rg_header checks' => sub { $meta = new_ok($MODULE => [{ 'in' => File::Spec->catfile($test_data, 'header.bam'), 'temp' => 'somepath',}]); - is($meta->rg_header(q{\t}, $RG_TAGS), $RG_STRING, 'RG header constructed correctly'); + + like($meta->rg_header(qq{\t}), $RG_DEFAULT, 'RG default header constructed correctly'); + + $meta = new_ok($MODULE => [{ 'in' => File::Spec->catfile($test_data, 'header.bam'), + 'temp' => 'somepath',}]); + + like($meta->rg_header(q{\t}, $RG_TAGS), $RG_STRING, 'RG header constructed correctly'); like( exception { $meta->rg_header(q{\t}, $RG_TAGS) } , qr/'rg_header' has already been set/ , 'Fail to set rg_header a second time'); - is($meta->rg_header(q{\t}), $RG_STRING, 'RG header retrieved for arg pass'); - is($meta->rg_header(qq{\t}), $RG_PRINT, 'RG header retrieved for print'); + like($meta->rg_header(q{\t}), $RG_STRING, 'RG header retrieved for arg pass'); + like($meta->rg_header(qq{\t}), $RG_PRINT, 'RG header retrieved for print'); # clear header for further tests my $tmp = tempdir( CLEANUP => 1 ); diff --git a/testData/empty.bam.bas b/testData/empty.bam.bas new file mode 100644 index 0000000..e69de29 diff --git a/testData/test.bam.bas b/testData/test.bam.bas new file mode 100644 index 0000000..30e2435 --- /dev/null +++ b/testData/test.bam.bas @@ -0,0 +1,7 @@ +bam_filename sample platform platform_unit library readgroup read_length_r1 read_length_r2 #_mapped_bases #_mapped_bases_r1 #_mapped_bases_r2 #_divergent_bases #_divergent_bases_r1 #_divergent_bases_r2 #_total_reads #_total_reads_r1 #_total_reads_r2 #_mapped_reads #_mapped_reads_r1 #_mapped_reads_r2 #_mapped_reads_properly_paired #_gc_bases_r1 #_gc_bases_r2 mean_insert_size insert_size_sd median_insert_size #_duplicate_reads +PD13371a.bam PD13371a . . . 1 100 100 24066749115 12096320393 11970428722 86586034 39684931 46901103 241073020 120536510 120536510 240782372 120535428 120246944 115393875 4781114038 4789739424 460.470 39.905 462.000 14493033 +PD13371a.bam PD13371a . . . 2 100 100 33444518524 16803630544 16640887980 138613045 59491641 79121404 335098334 167549167 167549167 334976873 167548017 167428856 160427629 6777595914 6788823326 458.869 41.973 460.000 12588896 +PD13371a.bam PD13371a . . . 3 100 100 33335660832 16748798875 16586861957 142524514 60368180 82156334 334001680 167000840 167000840 333878745 166999808 166878937 160072250 6757465112 6769445772 458.952 41.462 460.000 12661963 +PD13371a.bam PD13371a . . . 4 100 100 33878129114 17020880701 16857248413 143374364 61893442 81480922 339452788 169726394 169726394 339325363 169725235 169600128 162274453 6867382398 6879486121 458.826 40.892 460.000 13363687 +PD13371a.bam PD13371a . . . 5 100 100 34174487947 17171931512 17002556435 142252787 60659258 81593529 342440716 171220358 171220358 342314027 171219098 171094929 163681639 6925259715 6936844924 458.865 40.729 460.000 12878319 +PD13371a.bam PD13371a . . . 6 100 100 38123815250 19163295158 18960520092 172053921 72582440 99471481 382152008 191076004 191076004 381924401 191074392 190850009 182263109 7591886249 7616998227 458.054 41.327 459.000 11399981