Skip to content

Commit

Permalink
Fixed memory leak in isBimera
Browse files Browse the repository at this point in the history
  • Loading branch information
benjjneb committed May 22, 2016
1 parent 26daf96 commit 9e7f9c9
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 199 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Description: The dada2 package provides "OTU picking" functionality, but instead
sequences and associated abundances after removing substitution and chimeric
errors. Taxonomic classification is also available via a native implementation
of the RDP classifier method.
Version: 1.0.0
Date: 2016-02-18
Version: 1.0.3
Date: 2016-05-22
Maintainer: Benjamin Callahan <benjamin.j.callahan@gmail.com>
Author: Benjamin Callahan <benjamin.j.callahan@gmail.com>, Paul McMurdie, Susan
Holmes
Expand Down
20 changes: 10 additions & 10 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @useDynLib dada2
#' @importFrom Rcpp evalCpp
NULL

dada_uniques <- function(seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose) {
.Call('dada2_dada_uniques', PACKAGE = 'dada2', seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose)
C_is_bimera <- function(sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift) {
.Call('dada2_C_is_bimera', PACKAGE = 'dada2', sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift)
}

C_nwalign <- function(s1, s2, score, gap_p, homo_gap_p, band, endsfree) {
Expand All @@ -17,10 +13,6 @@ C_eval_pair <- function(s1, s2) {
.Call('dada2_C_eval_pair', PACKAGE = 'dada2', s1, s2)
}

C_is_bimera <- function(sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift) {
.Call('dada2_C_is_bimera', PACKAGE = 'dada2', sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift)
}

C_pair_consensus <- function(s1, s2, prefer) {
.Call('dada2_C_pair_consensus', PACKAGE = 'dada2', s1, s2, prefer)
}
Expand Down Expand Up @@ -72,6 +64,14 @@ C_nwvec <- function(s1, s2, match, mismatch, gap_p, band) {
.Call('dada2_C_nwvec', PACKAGE = 'dada2', s1, s2, match, mismatch, gap_p, band)
}

#' @useDynLib dada2
#' @importFrom Rcpp evalCpp
NULL

dada_uniques <- function(seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose) {
.Call('dada2_dada_uniques', PACKAGE = 'dada2', seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose)
}

C_assign_taxonomy <- function(seqs, refs, ref_to_genus, genusmat, verbose) {
.Call('dada2_C_assign_taxonomy', PACKAGE = 'dada2', seqs, refs, ref_to_genus, genusmat, verbose)
}
Expand Down
2 changes: 1 addition & 1 deletion R/multiSample.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ makeSequenceTable <- function(samples, orderBy = "abundance") {
unqs <- lapply(samples, getUniques)
unqsqs <- unique(do.call(c, lapply(unqs, names)))
if(length(unique(nchar(unqsqs)))>1) { message("The sequences being tabled vary in length.") }
rval <- matrix(0, nrow=length(unqs), ncol=length(unqsqs))
rval <- matrix(0L, nrow=length(unqs), ncol=length(unqsqs))
# Samples are rows, columns are sequences
colnames(rval) <- unqsqs
for(i in seq_along(unqs)) {
Expand Down
78 changes: 39 additions & 39 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,20 @@

using namespace Rcpp;

// dada_uniques
Rcpp::List dada_uniques(std::vector< std::string > seqs, std::vector<int> abundances, Rcpp::NumericMatrix err, Rcpp::NumericMatrix quals, Rcpp::NumericMatrix score, int gap, bool use_kmers, double kdist_cutoff, int band_size, double omegaA, int max_clust, double min_fold, int min_hamming, bool use_quals, int qmax, bool final_consensus, bool vectorized_alignment, int homo_gap, bool verbose);
RcppExport SEXP dada2_dada_uniques(SEXP seqsSEXP, SEXP abundancesSEXP, SEXP errSEXP, SEXP qualsSEXP, SEXP scoreSEXP, SEXP gapSEXP, SEXP use_kmersSEXP, SEXP kdist_cutoffSEXP, SEXP band_sizeSEXP, SEXP omegaASEXP, SEXP max_clustSEXP, SEXP min_foldSEXP, SEXP min_hammingSEXP, SEXP use_qualsSEXP, SEXP qmaxSEXP, SEXP final_consensusSEXP, SEXP vectorized_alignmentSEXP, SEXP homo_gapSEXP, SEXP verboseSEXP) {
// C_is_bimera
bool C_is_bimera(std::string sq, std::vector<std::string> pars, bool allow_one_off, int min_one_off_par_dist, Rcpp::NumericMatrix score, int gap_p, int max_shift);
RcppExport SEXP dada2_C_is_bimera(SEXP sqSEXP, SEXP parsSEXP, SEXP allow_one_offSEXP, SEXP min_one_off_par_distSEXP, SEXP scoreSEXP, SEXP gap_pSEXP, SEXP max_shiftSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector< std::string > >::type seqs(seqsSEXP);
Rcpp::traits::input_parameter< std::vector<int> >::type abundances(abundancesSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type err(errSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type quals(qualsSEXP);
Rcpp::traits::input_parameter< std::string >::type sq(sqSEXP);
Rcpp::traits::input_parameter< std::vector<std::string> >::type pars(parsSEXP);
Rcpp::traits::input_parameter< bool >::type allow_one_off(allow_one_offSEXP);
Rcpp::traits::input_parameter< int >::type min_one_off_par_dist(min_one_off_par_distSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type score(scoreSEXP);
Rcpp::traits::input_parameter< int >::type gap(gapSEXP);
Rcpp::traits::input_parameter< bool >::type use_kmers(use_kmersSEXP);
Rcpp::traits::input_parameter< double >::type kdist_cutoff(kdist_cutoffSEXP);
Rcpp::traits::input_parameter< int >::type band_size(band_sizeSEXP);
Rcpp::traits::input_parameter< double >::type omegaA(omegaASEXP);
Rcpp::traits::input_parameter< int >::type max_clust(max_clustSEXP);
Rcpp::traits::input_parameter< double >::type min_fold(min_foldSEXP);
Rcpp::traits::input_parameter< int >::type min_hamming(min_hammingSEXP);
Rcpp::traits::input_parameter< bool >::type use_quals(use_qualsSEXP);
Rcpp::traits::input_parameter< int >::type qmax(qmaxSEXP);
Rcpp::traits::input_parameter< bool >::type final_consensus(final_consensusSEXP);
Rcpp::traits::input_parameter< bool >::type vectorized_alignment(vectorized_alignmentSEXP);
Rcpp::traits::input_parameter< int >::type homo_gap(homo_gapSEXP);
Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
__result = Rcpp::wrap(dada_uniques(seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose));
Rcpp::traits::input_parameter< int >::type gap_p(gap_pSEXP);
Rcpp::traits::input_parameter< int >::type max_shift(max_shiftSEXP);
__result = Rcpp::wrap(C_is_bimera(sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift));
return __result;
END_RCPP
}
Expand Down Expand Up @@ -66,23 +54,6 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// C_is_bimera
bool C_is_bimera(std::string sq, std::vector<std::string> pars, bool allow_one_off, int min_one_off_par_dist, Rcpp::NumericMatrix score, int gap_p, int max_shift);
RcppExport SEXP dada2_C_is_bimera(SEXP sqSEXP, SEXP parsSEXP, SEXP allow_one_offSEXP, SEXP min_one_off_par_distSEXP, SEXP scoreSEXP, SEXP gap_pSEXP, SEXP max_shiftSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::string >::type sq(sqSEXP);
Rcpp::traits::input_parameter< std::vector<std::string> >::type pars(parsSEXP);
Rcpp::traits::input_parameter< bool >::type allow_one_off(allow_one_offSEXP);
Rcpp::traits::input_parameter< int >::type min_one_off_par_dist(min_one_off_par_distSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type score(scoreSEXP);
Rcpp::traits::input_parameter< int >::type gap_p(gap_pSEXP);
Rcpp::traits::input_parameter< int >::type max_shift(max_shiftSEXP);
__result = Rcpp::wrap(C_is_bimera(sq, pars, allow_one_off, min_one_off_par_dist, score, gap_p, max_shift));
return __result;
END_RCPP
}
// C_pair_consensus
Rcpp::CharacterVector C_pair_consensus(std::string s1, std::string s2, int prefer);
RcppExport SEXP dada2_C_pair_consensus(SEXP s1SEXP, SEXP s2SEXP, SEXP preferSEXP) {
Expand Down Expand Up @@ -165,6 +136,35 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// dada_uniques
Rcpp::List dada_uniques(std::vector< std::string > seqs, std::vector<int> abundances, Rcpp::NumericMatrix err, Rcpp::NumericMatrix quals, Rcpp::NumericMatrix score, int gap, bool use_kmers, double kdist_cutoff, int band_size, double omegaA, int max_clust, double min_fold, int min_hamming, bool use_quals, int qmax, bool final_consensus, bool vectorized_alignment, int homo_gap, bool verbose);
RcppExport SEXP dada2_dada_uniques(SEXP seqsSEXP, SEXP abundancesSEXP, SEXP errSEXP, SEXP qualsSEXP, SEXP scoreSEXP, SEXP gapSEXP, SEXP use_kmersSEXP, SEXP kdist_cutoffSEXP, SEXP band_sizeSEXP, SEXP omegaASEXP, SEXP max_clustSEXP, SEXP min_foldSEXP, SEXP min_hammingSEXP, SEXP use_qualsSEXP, SEXP qmaxSEXP, SEXP final_consensusSEXP, SEXP vectorized_alignmentSEXP, SEXP homo_gapSEXP, SEXP verboseSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< std::vector< std::string > >::type seqs(seqsSEXP);
Rcpp::traits::input_parameter< std::vector<int> >::type abundances(abundancesSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type err(errSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type quals(qualsSEXP);
Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type score(scoreSEXP);
Rcpp::traits::input_parameter< int >::type gap(gapSEXP);
Rcpp::traits::input_parameter< bool >::type use_kmers(use_kmersSEXP);
Rcpp::traits::input_parameter< double >::type kdist_cutoff(kdist_cutoffSEXP);
Rcpp::traits::input_parameter< int >::type band_size(band_sizeSEXP);
Rcpp::traits::input_parameter< double >::type omegaA(omegaASEXP);
Rcpp::traits::input_parameter< int >::type max_clust(max_clustSEXP);
Rcpp::traits::input_parameter< double >::type min_fold(min_foldSEXP);
Rcpp::traits::input_parameter< int >::type min_hamming(min_hammingSEXP);
Rcpp::traits::input_parameter< bool >::type use_quals(use_qualsSEXP);
Rcpp::traits::input_parameter< int >::type qmax(qmaxSEXP);
Rcpp::traits::input_parameter< bool >::type final_consensus(final_consensusSEXP);
Rcpp::traits::input_parameter< bool >::type vectorized_alignment(vectorized_alignmentSEXP);
Rcpp::traits::input_parameter< int >::type homo_gap(homo_gapSEXP);
Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
__result = Rcpp::wrap(dada_uniques(seqs, abundances, err, quals, score, gap, use_kmers, kdist_cutoff, band_size, omegaA, max_clust, min_fold, min_hamming, use_quals, qmax, final_consensus, vectorized_alignment, homo_gap, verbose));
return __result;
END_RCPP
}
// C_assign_taxonomy
Rcpp::List C_assign_taxonomy(std::vector<std::string> seqs, std::vector<std::string> refs, std::vector<int> ref_to_genus, Rcpp::IntegerMatrix genusmat, bool verbose);
RcppExport SEXP dada2_C_assign_taxonomy(SEXP seqsSEXP, SEXP refsSEXP, SEXP ref_to_genusSEXP, SEXP genusmatSEXP, SEXP verboseSEXP) {
Expand Down
151 changes: 151 additions & 0 deletions src/chimera.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#include "dada.h"
#include <Rcpp.h>
using namespace Rcpp;

int get_ham_endsfree(const char *seq1, const char *seq2, int len);

//------------------------------------------------------------------
// Determines whether sq is a perfect bimera of some combination from pars.
//
// @param sq The query DNA sequence.
// @param pars Potential bimeric "parents".
// @param max_shift A \code{integer(1)} of the maximum alignment shift allowed.
//
// [[Rcpp::export]]
bool C_is_bimera(std::string sq, std::vector<std::string> pars, bool allow_one_off, int min_one_off_par_dist, Rcpp::NumericMatrix score, int gap_p, int max_shift) {
// For now only finding perfect bimeras
int i, j, left, right, left_oo, right_oo, pos, len, max_len;
// Make c-style 2d score array
int c_score[4][4];
for(i=0;i<4;i++) {
for(j=0;j<4;j++) {
c_score[i][j] = (int) score(i,j);
}
}

// Make integer-ized c-style sequence strings
char *seq1 = (char *) malloc(sq.size()+1); //E
// Get max length of the parent sequences
max_len=0;
for(i=0;i<pars.size();i++) {
len = pars[i].size();
if(len>max_len) { max_len = len; }
}
char *seq2 = (char *) malloc(max_len+1); //E
if (seq1 == NULL || seq2 == NULL) Rcpp::stop("Memory allocation failed.");
nt2int(seq1, sq.c_str());

char **al;
int max_left=0, max_right=0;
int oo_max_left=0, oo_max_right=0, oo_max_left_oo=0, oo_max_right_oo=0;

bool rval = false;
for(i=0;i<pars.size() && rval==false;i++) {
nt2int(seq2, pars[i].c_str());
al = nwalign_endsfree(seq1, seq2, c_score, gap_p, max_shift); // Remember, alignments must be freed!
len = strlen(al[0]);

pos=0; left=0;
while(al[0][pos] == 6 && pos<len) {
pos++; // Scan in until query starts
}
while(al[1][pos] == 6 && pos<max_shift) {
pos++; left++; // Credit as ends-free coverage until parent starts
}
while(pos<len && al[0][pos] == al[1][pos]) {
pos++; left++; // Credit as covered until a mismatch
}
if(allow_one_off) {
// Step forward, and credit to one-off further matches (and this mismatch if not a gap)
left_oo = left;
pos++;
if(pos<len && al[0][pos] != 6) { left_oo++; }
while(pos<len && al[0][pos] == al[1][pos]) {
pos++; left_oo++;
}
}

pos=len-1; right=0;
while(al[0][pos] == 6 && pos >= 0) {
pos--;
}
while(al[1][pos] == 6 && pos>+(len-max_shift)) {
pos--; right++;
}
while(pos>=0 && al[0][pos] == al[1][pos]) {
pos--; right++;
}
if(allow_one_off) {
// Step forward, and credit to one-off further matches (and this mismatch if not a gap)
right_oo = right;
pos--;
if(pos>=0 && al[0][pos] != 6) { right_oo++; }
while(pos>=0 && al[0][pos] == al[1][pos]) {
pos--; right_oo++;
}
}

if((left+right) >= sq.size()) { // Toss id/pure-shift/internal-indel "parents"
continue;
}
if(left > max_left) { max_left=left; }
if(right > max_right) { max_right=right; }

// Need to evaluate whether parents are allowed for one-off models
if(allow_one_off && get_ham_endsfree(al[0], al[1], len) >= min_one_off_par_dist) {
if(left > oo_max_left) { oo_max_left=left; }
if(right > oo_max_right) { oo_max_right=right; }
if(left_oo > oo_max_left_oo) { oo_max_left_oo=left_oo; }
if(right_oo > oo_max_right_oo) { oo_max_right_oo=right_oo; }
}

// Evaluate, and break if found bimeric model
if((max_right+max_left)>=sq.size()) {
rval = true;
}
if(allow_one_off) {
if((oo_max_left+oo_max_right_oo)>=sq.size() || (oo_max_left_oo+oo_max_right)>=sq.size()) {
rval=true;
}
}
free(al[0]);
free(al[1]);
free(al);
}

free(seq1);
free(seq2);
return(rval);
}

// Internal function to get hamming distance between aligned seqs
// without counting end gaps
int get_ham_endsfree(const char *seq1, const char *seq2, int len) {
int i, j, pos, ham;
bool gap1, gap2;
// Find start of internal part of alignment
i=0;
gap1 = (seq1[i]==6);
gap2 = (seq2[i]==6);
while(gap1 || gap2) {
i++;
gap1 = (gap1 && (seq1[i]==6));
gap2 = (gap2 && (seq2[i]==6));
}
// Find end of internal part of alignment
j=len-1;
gap1 = (seq1[j]==6);
gap2 = (seq2[j]==6);
while(gap1 || gap2) {
j--;
gap1 = (gap1 && (seq1[j]==6));
gap2 = (gap2 && (seq2[j]==6));
}
// Calculate hamming distance over internal part
ham=0;
for(pos=i;pos<=j;pos++) {
if(seq1[pos] != seq2[pos]) { ham++; }
}
return(ham);
}

Loading

0 comments on commit 9e7f9c9

Please sign in to comment.