Skip to content

Commit

Permalink
r671: cleanup command line options
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Jan 31, 2018
1 parent 46d6349 commit da6947c
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 64 deletions.
56 changes: 33 additions & 23 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"

#define MM_VERSION "2.7-r670-dirty"
#define MM_VERSION "2.7-r671-dirty"

#ifdef __linux__
#include <sys/resource.h>
Expand All @@ -28,7 +28,7 @@ static struct option long_options[] = {
{ "seed", required_argument, 0, 0 },
{ "no-kalloc", no_argument, 0, 0 },
{ "print-qname", no_argument, 0, 0 },
{ "no-self", no_argument, 0, 0 },
{ "no-self", no_argument, 0, 'D' },
{ "print-seeds", no_argument, 0, 0 },
{ "max-chain-skip", required_argument, 0, 0 },
{ "min-dp-len", required_argument, 0, 0 },
Expand All @@ -37,17 +37,19 @@ static struct option long_options[] = {
{ "cost-non-gt-ag", required_argument, 0, 'C' },
{ "no-long-join", no_argument, 0, 0 },
{ "sr", no_argument, 0, 0 },
{ "frag", optional_argument, 0, 0 },
{ "secondary", optional_argument, 0, 0 },
{ "frag", required_argument, 0, 0 },
{ "secondary", required_argument, 0, 0 },
{ "cs", optional_argument, 0, 0 },
{ "end-bonus", required_argument, 0, 0 },
{ "no-pairing", no_argument, 0, 0 },
{ "splice-flank", optional_argument, 0, 0 },
{ "splice-flank", required_argument, 0, 0 },
{ "idx-no-seq", no_argument, 0, 0 },
{ "end-seed-pen", required_argument, 0, 0 }, // 21
{ "for-only", no_argument, 0, 0 }, // 22
{ "rev-only", no_argument, 0, 0 }, // 23
{ "heap-sort", optional_argument, 0, 0 }, // 24
{ "heap-sort", required_argument, 0, 0 }, // 24
{ "all-chain", no_argument, 0, 'P' },
{ "dual", required_argument, 0, 0 }, // 26
{ "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' },
Expand All @@ -70,9 +72,22 @@ static inline int64_t mm_parse_num(const char *str)
return (int64_t)(x + .499);
}

static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set)
{
if (yes_to_set) {
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag &= ~flag;
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
} else {
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag &= ~flag;
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag |= flag;
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
}
}

int main(int argc, char *argv[])
{
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:";
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:";
mm_mapopt_t opt;
mm_idxopt_t ipt;
int i, c, n_threads = 3, long_idx;
Expand Down Expand Up @@ -111,7 +126,9 @@ int main(int argc, char *argv[])
else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
else if (c == 'D') opt.flag |= MM_F_NO_DIAG;
else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS;
else if (c == 'X') opt.flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; // -D -P --no-long-join --dual=no
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP;
Expand All @@ -133,7 +150,6 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
else if (c == 0 && long_idx == 5) opt.flag |= MM_F_NO_SELF; // --no-self
else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
Expand All @@ -148,13 +164,9 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only
else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only
else if (c == 0 && long_idx == 14) { // --frag
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
opt.flag |= MM_F_FRAG_MODE;
else opt.flag &= ~MM_F_FRAG_MODE;
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 15) { // --secondary
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
opt.flag &= ~MM_F_NO_PRINT_2ND;
else opt.flag |= MM_F_NO_PRINT_2ND;
yes_or_no(&opt, MM_F_NO_PRINT_2ND, long_idx, optarg, 0);
} else if (c == 0 && long_idx == 16) { // --cs
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
if (optarg == 0 || strcmp(optarg, "short") == 0) {
Expand All @@ -167,13 +179,11 @@ int main(int argc, char *argv[])
fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
}
} else if (c == 0 && long_idx == 19) { // --splice-flank
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
opt.flag |= MM_F_SPLICE_FLANK;
else opt.flag &= ~MM_F_SPLICE_FLANK;
yes_or_no(&opt, MM_F_SPLICE_FLANK, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 24) { // --heap-sort
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
opt.flag |= MM_F_HEAP_SORT;
else opt.flag &= ~MM_F_HEAP_SORT;
yes_or_no(&opt, MM_F_HEAP_SORT, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 26) { // --dual
yes_or_no(&opt, MM_F_NO_DUAL, long_idx, optarg, 0);
} else if (c == 'S') {
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
if (mm_verbose >= 2)
Expand Down Expand Up @@ -261,8 +271,8 @@ int main(int argc, char *argv[])
fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n");
fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " ava-pb: -Hk19 -Xw5 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -Xw5 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n");
fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n");
fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
Expand Down
12 changes: 6 additions & 6 deletions map.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ static mm_match_t *collect_matches(void *km, int *_n_m, int max_occ, const mm_id
static inline int skip_seed(int flag, uint64_t r, const mm_match_t *q, const char *qname, int qlen, const mm_idx_t *mi, int *is_self)
{
*is_self = 0;
if (qname && (flag & (MM_F_NO_SELF|MM_F_AVA))) {
if (qname && (flag & (MM_F_NO_DIAG|MM_F_NO_DUAL))) {
const mm_idx_seq_t *s = &mi->seq[r>>32];
int cmp;
cmp = strcmp(qname, s->name);
if ((flag&MM_F_NO_SELF) && cmp == 0 && s->len == qlen) {
if ((flag&MM_F_NO_DIAG) && cmp == 0 && s->len == qlen) {
if ((uint32_t)r>>1 == (q->q_pos>>1)) return 1; // avoid the diagnonal anchors
if ((r&1) == (q->q_pos&1)) *is_self = 1; // this flag is used to avoid spurious extension on self chain
}
if ((flag&MM_F_AVA) && cmp > 0) // all-vs-all mode: map once
if ((flag&MM_F_NO_DUAL) && cmp > 0) // all-vs-all mode: map once
return 1;
}
if (flag & (MM_F_FOR_ONLY|MM_F_REV_ONLY)) {
Expand Down Expand Up @@ -237,11 +237,11 @@ static mm128_t *collect_seed_hits(void *km, const mm_mapopt_t *opt, int max_occ,

static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
{
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR) && !(opt->flag & MM_F_NO_LJOIN))
if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains
mm_join_long(km, opt, qlen, n_regs, regs, a);
}
}
Expand All @@ -250,7 +250,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
{
if (!(opt->flag & MM_F_CIGAR)) return regs;
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_AVA)) {
if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
mm_set_sam_pri(*n_regs, regs);
Expand Down
29 changes: 15 additions & 14 deletions minimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
#include <stdio.h>
#include <sys/types.h>

#define MM_F_NO_SELF 0x001
#define MM_F_AVA 0x002
#define MM_F_CIGAR 0x004
#define MM_F_OUT_SAM 0x008
#define MM_F_NO_QUAL 0x010
#define MM_F_OUT_CG 0x020
#define MM_F_OUT_CS 0x040
#define MM_F_SPLICE 0x080 // splice mode
#define MM_F_SPLICE_FOR 0x100 // match GT-AG
#define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
#define MM_F_NO_LJOIN 0x400
#define MM_F_OUT_CS_LONG 0x800
#define MM_F_SR 0x1000
#define MM_F_FRAG_MODE 0x2000
#define MM_F_NO_DIAG 0x001 // no exact diagonal hit
#define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
#define MM_F_CIGAR 0x004
#define MM_F_OUT_SAM 0x008
#define MM_F_NO_QUAL 0x010
#define MM_F_OUT_CG 0x020
#define MM_F_OUT_CS 0x040
#define MM_F_SPLICE 0x080 // splice mode
#define MM_F_SPLICE_FOR 0x100 // match GT-AG
#define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
#define MM_F_NO_LJOIN 0x400
#define MM_F_OUT_CS_LONG 0x800
#define MM_F_SR 0x1000
#define MM_F_FRAG_MODE 0x2000
#define MM_F_NO_PRINT_2ND 0x4000
#define MM_F_2_IO_THREADS 0x8000
#define MM_F_LONG_CIGAR 0x10000
Expand All @@ -28,6 +28,7 @@
#define MM_F_FOR_ONLY 0x100000
#define MM_F_REV_ONLY 0x200000
#define MM_F_HEAP_SORT 0x400000
#define MM_F_ALL_CHAINS 0x800000

#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2
Expand Down
57 changes: 38 additions & 19 deletions minimap2.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH minimap2 1 "26 January 2018" "minimap2-2.7-dirty (r664)" "Bioinformatics tools"
.TH minimap2 1 "31 January 2018" "minimap2-2.8-dirty (r671)" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
Expand Down Expand Up @@ -129,7 +129,7 @@ Ignore top
fraction of most frequent minimizers [0.0002]
.TP
.BI -g \ INT
Stop chain enlongation if there are no minimizers in
Stop chain enlongation if there are no minimizers within
.IR INT -bp
[10000].
.TP
Expand All @@ -148,11 +148,28 @@ Discard chains with chaining score
[40]. Chaining score equals the approximate number of matching bases minus a
concave gap penalty. It is computed with dynamic programming.
.TP
.B -D
If query sequence name/length are identical to the target name/length, ignore
diagonal anchors. This option also reduces DP-based extension along the
diagonal.
.TP
.B -P
Retain all chains and don't attempt to set primary chains. Options
.B -p
and
.B -N
have no effect when this option is in use.
.TP
.BR --dual = yes | no
During chaining, whether to skip pairs wherein the query name is
lexicographically greater than the target name [yes]
.TP
.B -X
Perform all-vs-all mapping. In this mode, if the query sequence name is
lexicographically larger than the target sequence name, the hits between them
will be suppressed; if the query sequence name is the same as the target name,
diagonal minimizer hits will also be suppressed.
Equivalent to
.RB ' -DP
.BR --dual = no
.BR --no-long-join '.
Primarily used for all-vs-all read overlapping.
.TP
.BI -p \ FLOAT
Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
Expand All @@ -162,6 +179,9 @@ the chain with a lower score is secondary to the chain with a higher score.
If the ratio of the scores is below
.IR FLOAT ,
the secondary chain will not be outputted or extended with DP alignment later.
This option has no effect when
.B -X
is applied.
.TP
.BI -N \ INT
Output at most
Expand All @@ -179,7 +199,7 @@ Increasing this option slows down spliced alignment. [200k]
.TP
.BI -F \ NUM
Maximum fragment length (aka insert size; effective with
.BR -xsr / --frag)
.BR -xsr / --frag = yes )
[800]
.TP
.BI -M \ FLOAT
Expand Down Expand Up @@ -209,7 +229,7 @@ applies a second round of chaining with a higher minimizer occurrence threshold
if no good chain is found. In addition, minimap2 attempts to patch gaps between
seeds with ungapped alignment.
.TP
.BR --frag [= no | yes ]
.BR --frag = no | yes
Whether to enable the fragment mode [no]
.TP
.B --for-only
Expand All @@ -220,7 +240,7 @@ strand of the reference and the second read to the reverse stand.
.B --rev-only
Only map to the reverse complement strand of the reference sequences.
.TP
.BR --heap-sort [= no | yes ]
.BR --heap-sort = no | yes
If yes, sort anchors with heap merge, instead of radix sort. Heap merge is
faster for short reads, but slower for long reads. [no]
.SS Alignment options
Expand Down Expand Up @@ -272,14 +292,13 @@ no attempt to match GT-AG [n]
.BI --end-bonus \ INT
Score bonus when alignment extends to the end of the query sequence [0].
.TP
.BR --splice-flank [= yes | no ]
.BR --splice-flank = yes | no
Assume the next base to a
.B GT
donor site tends to be A/G (91% in human and 92% in mouse) and the preceding
base to a
.B AG
acceptor tends to be C/T [yes with
.BR --splice ].
acceptor tends to be C/T [no].
This trend is evolutionarily conservative, all the way to S. cerevisiae
(PMID:18688272). Specifying this option generally leads to higher junction
accuracy by several percents, so it is applied by default with
Expand Down Expand Up @@ -369,7 +388,7 @@ K/M/G/k/m/g suffix is accepted. A large
helps load balancing in the multi-threading mode, at the cost of increased
memory.
.TP
.BR --secondary [= yes | no ]
.BR --secondary = yes | no
Whether to output secondary alignments [yes]
.TP
.B --version
Expand Down Expand Up @@ -416,13 +435,13 @@ Up to 10% sequence divergence.
.B ava-pb
PacBio all-vs-all overlap mapping
.RB ( -Hk19
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.B -Xw5 -m100 -g10000 --max-chain-skip
.BR 25 ).
.TP
.B ava-ont
Oxford Nanopore all-vs-all overlap mapping
.RB ( -k15
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip
.B -Xw5 -m100 -g10000 --max-chain-skip
.BR 25 ).
Similarly, the major difference from
.B ava-pb
Expand All @@ -444,8 +463,8 @@ tag ignores introns to demote hits to pseudogenes.
.B sr
Short single-end reads without splicing
.RB ( -k21
.B -w11 --sr --frag -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
.B -s40 -g200 -2K50m --heap-sort
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
.B -s40 -g200 -2K50m --heap-sort=yes
.BR --secondary=no ).
.RE
.SS Miscellaneous options
Expand Down Expand Up @@ -539,8 +558,8 @@ where seed positions may be suboptimal. This should not be a big concern
because even the optimal alignment may be wrong in such regions.
.TP
*
Minimap2 requires SSE2 instructions to compile. It is possible to add
non-SSE2 support, but it would make minimap2 slower by several times.
Minimap2 requires SSE2 or NEON instructions to compile. It is possible to add
non-SSE2/NEON support, but it would make minimap2 slower by several times.
.SH SEE ALSO
.PP
miniasm(1), minimap(1), bwa(1).
4 changes: 2 additions & 2 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mm_mapopt_init(mo);
} else if (strcmp(preset, "ava-ont") == 0) {
io->flag = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
} else if (strcmp(preset, "ava-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF;
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19;
Expand Down

0 comments on commit da6947c

Please sign in to comment.