Skip to content

Commit

Permalink
(Polishing) tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
jamshed committed Jan 29, 2023
1 parent aff9755 commit e3a7c13
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion include/Build_Params.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Build_Params
const std::string output_file_path_; // Path to the output file.
const std::optional<cuttlefish::Output_Format> output_format_; // Output format (0: FASTA, 1: GFAv1, 2: GFAv2, 3: GFA-reduced).
const bool track_short_seqs_; // Whether to track input sequences shorter than `k` bases.
const bool poly_n_stretch_; // Whether to include tiles in GFA-reduced output that track the polyN stretches in the input.
const bool poly_n_stretch_; // Whether to include tiles in GFA-reduced output that track the polyN stretches in the input.
const std::string working_dir_path_; // Path to the working directory (for temporary files).
const bool path_cover_; // Whether to extract a maximal path cover of the de Bruijn graph.
const bool save_mph_; // Option to save the MPH over the vertex set of the de Bruijn graph.
Expand Down
4 changes: 2 additions & 2 deletions include/Oriented_Unitig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class Oriented_Unitig

uint64_t unitig_id;
cuttlefish::dir_t dir;
size_t start_kmer_idx; // Position, on the ref, of the last kmer of the unitig occurrence
size_t end_kmer_idx; // Position, on the ref, of the first kmer of the unitig occurrence
size_t start_kmer_idx; // Position, on the ref, of the last k-mer of the unitig occurrence
size_t end_kmer_idx; // Position, on the ref, of the first k-mer of the unitig occurrence

constexpr static uint64_t INVALID_ID = std::numeric_limits<uint64_t>::max();

Expand Down
3 changes: 1 addition & 2 deletions src/CdBG_GFA_Reduced_Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,8 @@ void CdBG<k>::write_sequence_tiling(Job_Queue<std::string, Oriented_Unitig>& job
// Write the path members.
output << "\t";

if(poly_n_stretch and left_unitig.start_kmer_idx > 0) {
if(poly_n_stretch && left_unitig.start_kmer_idx > 0)
output << "N" << left_unitig.start_kmer_idx << " ";
}

// The first vertex of the path (not inferrable from the path output files).
output << left_unitig.unitig_id << (left_unitig.dir == cuttlefish::FWD ? "+" : "-");
Expand Down
17 changes: 9 additions & 8 deletions src/CdBG_GFA_Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,24 +601,25 @@ void CdBG<k>::append_edge_to_path(const uint16_t thread_id, const Oriented_Uniti
// The destination vertex (unitig) is written for each edge.
// Note that, the very first vertex of the path tiling for the sequence is thus missing in the path outputs.

(void)left_unitig;

std::string& buffer = path_buffer[thread_id];
const bool poly_n_stretch = params.poly_n_stretch();
const int64_t pk = static_cast<int64_t>(params.k());

// Encode polyN stretch
// *.{first, last}_kmer_index stores the position of the kmer on the ref.
if( poly_n_stretch and ((left_unitig.end_kmer_idx + 1) != right_unitig.start_kmer_idx)) {
if(poly_n_stretch && ((left_unitig.end_kmer_idx + 1) != right_unitig.start_kmer_idx))
{
const int64_t nuc_gap = static_cast<int64_t>(right_unitig.start_kmer_idx) - static_cast<int64_t>(left_unitig.end_kmer_idx);

// If left_unitig starts at offset 0 and ends at offset k - 1, the smallest polyN stretch of len 1 starts
// with index k, and right unitig must start at k+1.
if ( nuc_gap < (pk + 1)) {
// with index k, and right unitig must start at k + 1.
if(nuc_gap < (k + 1))
{
// Any difference in idxs < k+1 must be invalid.
std::cerr << "ERROR: invalid polyN gap length\n";
std::cerr << "ERROR: invalid polyN gap length.\n";
std::exit(EXIT_FAILURE);
}
size_t polyn_gap = static_cast<size_t>(nuc_gap - pk);

const size_t polyn_gap = static_cast<size_t>(nuc_gap - k);
buffer += " ";
buffer += "N";
buffer += fmt::format_int(polyn_gap).c_str();
Expand Down

0 comments on commit e3a7c13

Please sign in to comment.