From cacd570967668cbb157cbf4fab508c9b597ceb1b Mon Sep 17 00:00:00 2001
From: Alex Dowad pcre2pattern man page
PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
-This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
+This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3
index 84e4aff47..b0936c91a 100644
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@@ -2242,7 +2242,7 @@ package, and PCRE1 copied it from there. It found its way into Perl at release
PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
-This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
+This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
.P
When a pattern contains an unlimited repeat inside a group that can itself be
diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic
index a3341e6f5..0896b72ca 100644
--- a/src/pcre2.h.generic
+++ b/src/pcre2.h.generic
@@ -464,6 +464,18 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
#define PCRE2_CONFIG_TABLES_LENGTH 15
+/* Optimization directives for pcre2_set_optimize().
+For binary compatibility, only add to this list; do not renumber. */
+
+#define PCRE2_OPTIMIZATION_NONE 0
+#define PCRE2_OPTIMIZATION_FULL 1
+
+#define PCRE2_AUTO_POSSESS 64
+#define PCRE2_AUTO_POSSESS_OFF 65
+#define PCRE2_DOTSTAR_ANCHOR 66
+#define PCRE2_DOTSTAR_ANCHOR_OFF 67
+#define PCRE2_START_OPTIMIZE 68
+#define PCRE2_START_OPTIMIZE_OFF 69
/* Types for code units in patterns and subject strings. */
@@ -617,7 +629,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
- int (*)(uint32_t, void *), void *);
+ int (*)(uint32_t, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_optimize(pcre2_compile_context *, uint32_t);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
@@ -912,6 +926,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
+#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index a19313c9e..9595a8540 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -464,6 +464,18 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
#define PCRE2_CONFIG_TABLES_LENGTH 15
+/* Optimization directives for pcre2_set_optimize().
+For binary compatibility, only add to this list; do not renumber. */
+
+#define PCRE2_OPTIMIZATION_NONE 0
+#define PCRE2_OPTIMIZATION_FULL 1
+
+#define PCRE2_AUTO_POSSESS 64
+#define PCRE2_AUTO_POSSESS_OFF 65
+#define PCRE2_DOTSTAR_ANCHOR 66
+#define PCRE2_DOTSTAR_ANCHOR_OFF 67
+#define PCRE2_START_OPTIMIZE 68
+#define PCRE2_START_OPTIMIZE_OFF 69
/* Types for code units in patterns and subject strings. */
@@ -617,7 +629,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
- int (*)(uint32_t, void *), void *);
+ int (*)(uint32_t, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_optimize(pcre2_compile_context *, uint32_t);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
@@ -912,6 +926,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
+#define pcre2_set_optimize PCRE2_SUFFIX(pcre2_set_optimize_)
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 48dae18fa..1e787e952 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -834,7 +834,8 @@ enum { PSO_OPT, /* Value is an option bit */
PSO_BSR, /* Value is a \R type */
PSO_LIMH, /* Read integer value for heap limit */
PSO_LIMM, /* Read integer value for match limit */
- PSO_LIMD /* Read integer value for depth limit */
+ PSO_LIMD, /* Read integer value for depth limit */
+ PSO_OPTMZ /* Value is an optimization bit */
};
typedef struct pso {
@@ -852,10 +853,10 @@ static const pso pso_list[] = {
{ STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
{ STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
{ STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET },
- { STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
- { STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
+ { STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPTMZ, PCRE2_OPTIM_AUTO_POSSESS },
+ { STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPTMZ, PCRE2_OPTIM_DOTSTAR_ANCHOR },
{ STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
- { STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
+ { STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPTMZ, PCRE2_OPTIM_START_OPTIMIZE },
{ STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
{ STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
{ STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
@@ -8883,13 +8884,14 @@ this prevents the number of characters it matches from being adjusted.
cb points to the compile data block
atomcount atomic group level
inassert TRUE if in an assertion
+ dotstar_anchor TRUE if automatic anchoring optimization is enabled
Returns: TRUE or FALSE
*/
static BOOL
is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
- int atomcount, BOOL inassert)
+ int atomcount, BOOL inassert, BOOL dotstar_anchor)
{
do {
PCRE2_SPTR scode = first_significant_code(
@@ -8901,7 +8903,7 @@ do {
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor))
return FALSE;
}
@@ -8912,14 +8914,14 @@ do {
{
int n = GET2(scode, 1+LINK_SIZE);
uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
- if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
+ if (!is_anchored(scode, new_map, cb, atomcount, inassert, dotstar_anchor)) return FALSE;
}
/* Positive forward assertion */
else if (op == OP_ASSERT || op == OP_ASSERT_NA)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
+ if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor)) return FALSE;
}
/* Condition. If there is no second branch, it can't be anchored. */
@@ -8927,7 +8929,7 @@ do {
else if (op == OP_COND || op == OP_SCOND)
{
if (scode[GET(scode,1)] != OP_ALT) return FALSE;
- if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor))
return FALSE;
}
@@ -8935,7 +8937,7 @@ do {
else if (op == OP_ONCE)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
+ if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor))
return FALSE;
}
@@ -8950,8 +8952,7 @@ do {
op == OP_TYPEPOSSTAR))
{
if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
- atomcount > 0 || cb->had_pruneorskip || inassert ||
- (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
+ atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor)
return FALSE;
}
@@ -8988,13 +8989,14 @@ or *SKIP does not count, because once again the assumption no longer holds.
cb points to the compile data
atomcount atomic group level
inassert TRUE if in an assertion
+ dotstar_anchor TRUE if automatic anchoring optimization is enabled
Returns: TRUE or FALSE
*/
static BOOL
is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
- int atomcount, BOOL inassert)
+ int atomcount, BOOL inassert, BOOL dotstar_anchor)
{
do {
PCRE2_SPTR scode = first_significant_code(
@@ -9025,7 +9027,8 @@ do {
return FALSE;
default: /* Assertion */
- if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
+ if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor))
+ return FALSE;
do scode += GET(scode, 1); while (*scode == OP_ALT);
scode += 1 + LINK_SIZE;
break;
@@ -9039,7 +9042,7 @@ do {
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_startline(scode, bracket_map, cb, atomcount, inassert))
+ if (!is_startline(scode, bracket_map, cb, atomcount, inassert, dotstar_anchor))
return FALSE;
}
@@ -9050,14 +9053,15 @@ do {
{
int n = GET2(scode, 1+LINK_SIZE);
unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
- if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
+ if (!is_startline(scode, new_map, cb, atomcount, inassert, dotstar_anchor))
+ return FALSE;
}
/* Positive forward assertions */
else if (op == OP_ASSERT || op == OP_ASSERT_NA)
{
- if (!is_startline(scode, bracket_map, cb, atomcount, TRUE))
+ if (!is_startline(scode, bracket_map, cb, atomcount, TRUE, dotstar_anchor))
return FALSE;
}
@@ -9065,7 +9069,7 @@ do {
else if (op == OP_ONCE)
{
- if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert))
+ if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert, dotstar_anchor))
return FALSE;
}
@@ -9079,8 +9083,7 @@ do {
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
{
if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 ||
- atomcount > 0 || cb->had_pruneorskip || inassert ||
- (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
+ atomcount > 0 || cb->had_pruneorskip || inassert || !dotstar_anchor)
return FALSE;
}
@@ -10362,6 +10365,10 @@ int regexrc; /* Return from compile */
uint32_t i; /* Local loop counter */
+/* Enable all optimizations by default. */
+uint32_t optim_flags = ccontext != NULL ? ccontext->optimization_flags :
+ PCRE2_OPTIMIZATION_ALL;
+
/* Comments at the head of this file explain about these variables. */
uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE];
@@ -10432,6 +10439,18 @@ if (patlen > ccontext->max_pattern_length)
return NULL;
}
+/* Optimization flags in 'options' can override those in the compile context.
+This is because some options to disable optimizations were added before the
+optimization flags word existed, and we need to continue supporting them
+for backwards compatibility. */
+
+if (options & PCRE2_NO_AUTO_POSSESS)
+ optim_flags &= ~PCRE2_OPTIM_AUTO_POSSESS;
+if (options & PCRE2_NO_DOTSTAR_ANCHOR)
+ optim_flags &= ~PCRE2_OPTIM_DOTSTAR_ANCHOR;
+if (options & PCRE2_NO_START_OPTIMIZE)
+ optim_flags &= ~PCRE2_OPTIM_START_OPTIMIZE;
+
/* From here on, all returns from this function should end up going via the
EXIT label. */
@@ -10568,6 +10587,32 @@ if ((options & PCRE2_LITERAL) == 0)
else limit_depth = c;
skipatstart = ++pp;
break;
+
+ case PSO_OPTMZ:
+ optim_flags &= ~(p->value);
+
+ /* For backward compatibility the three original VERBs to disable
+ optimizations need to also update the corresponding external option. */
+
+ switch(p->value)
+ {
+ case PCRE2_OPTIM_AUTO_POSSESS:
+ cb.external_options |= PCRE2_NO_AUTO_POSSESS;
+ break;
+
+ case PCRE2_OPTIM_DOTSTAR_ANCHOR:
+ cb.external_options |= PCRE2_NO_DOTSTAR_ANCHOR;
+ break;
+
+ case PCRE2_OPTIM_START_OPTIMIZE:
+ cb.external_options |= PCRE2_NO_START_OPTIMIZE;
+ break;
+ }
+
+ break;
+
+ default:
+ PCRE2_UNREACHABLE();
}
break; /* Out of the table scan loop */
}
@@ -10863,6 +10908,7 @@ re->top_bracket = 0;
re->top_backref = 0;
re->name_entry_size = cb.name_entry_size;
re->name_count = cb.names_found;
+re->optimization_flags = optim_flags;
/* The basic block is immediately followed by the name table, and the compiled
code follows after that. */
@@ -11005,7 +11051,7 @@ used in this code because at least one compiler gives a warning about loss of
"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the
function call. */
-if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
+if (errorcode == 0 && (optim_flags & PCRE2_OPTIM_AUTO_POSSESS))
{
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
@@ -11022,17 +11068,17 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to
disable this case). */
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
- is_anchored(codestart, 0, &cb, 0, FALSE))
+ is_anchored(codestart, 0, &cb, 0, FALSE, (optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0))
re->overall_options |= PCRE2_ANCHORED;
/* Set up the first code unit or startline flag, the required code unit, and
-then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE
-is set, as the data it would create will not be used. Note that a first code
+then study the pattern. This code need not be obeyed if PCRE2_OPTIM_START_OPTIMIZE
+is disabled, as the data it would create will not be used. Note that a first code
unit (but not the startline flag) is useful for anchored patterns because it
can still give a quick "no match" and also avoid searching for a last code
unit. */
-if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
+if (optim_flags & PCRE2_OPTIM_START_OPTIMIZE)
{
int minminlength = 0; /* For minimal minlength from first/required CU */
@@ -11096,7 +11142,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
that disables this case.) */
else if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
- is_startline(codestart, 0, &cb, 0, FALSE))
+ is_startline(codestart, 0, &cb, 0, FALSE, (optim_flags & PCRE2_OPTIM_DOTSTAR_ANCHOR) != 0))
re->flags |= PCRE2_STARTLINE;
/* Handle the "required code unit", if one is set. In the UTF case we can
diff --git a/src/pcre2_context.c b/src/pcre2_context.c
index 84a967d7a..382489c8b 100644
--- a/src/pcre2_context.c
+++ b/src/pcre2_context.c
@@ -141,7 +141,8 @@ pcre2_compile_context PRIV(default_compile_context) = {
NEWLINE_DEFAULT, /* Newline convention */
PARENS_NEST_LIMIT, /* As it says */
0, /* Extra options */
- MAX_VARLOOKBEHIND /* As it says */
+ MAX_VARLOOKBEHIND, /* As it says */
+ PCRE2_OPTIMIZATION_ALL /* All optimizations enabled */
};
/* The create function copies the default into the new memory, but must
@@ -409,6 +410,29 @@ ccontext->stack_guard_data = user_data;
return 0;
}
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
+{
+if (directive == PCRE2_OPTIMIZATION_NONE)
+ {
+ ccontext->optimization_flags = 0;
+ }
+else if (directive == PCRE2_OPTIMIZATION_FULL)
+ {
+ ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
+ }
+else if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
+ {
+ /* Even directive numbers switch a bit on, odd numbers switch a bit off.
+ * 64-65 affect the LSB, 66-67 the 2 bit, 68-69 the 4 bit, and so on. */
+ if (directive & 0x1)
+ ccontext->optimization_flags &= ~(1 << ((directive >> 1) - 32));
+ else
+ ccontext->optimization_flags |= 1 << ((directive >> 1) - 32);
+ }
+
+return 0;
+}
/* ------------ Match context ------------ */
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 3e34c7ca5..9f44b0d58 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -3432,7 +3432,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
-way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
+way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
@@ -3699,7 +3699,7 @@ for (;;)
these, for testing and for ensuring that all callouts do actually occur.
The optimizations must also be avoided when restarting a DFA match. */
- if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
+ if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) &&
(options & PCRE2_DFA_RESTART) == 0)
{
/* If firstline is TRUE, the start of the match is constrained to the first
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index 043d2c563..1b9bdc6a1 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -609,6 +609,13 @@ total length of the tables. */
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
#define TABLES_LENGTH (ctypes_offset + 256)
+/* Private flags used in compile_context.optimization_flags */
+
+#define PCRE2_OPTIM_AUTO_POSSESS 0x00000001u
+#define PCRE2_OPTIM_DOTSTAR_ANCHOR 0x00000002u
+#define PCRE2_OPTIM_START_OPTIMIZE 0x00000004u
+
+#define PCRE2_OPTIMIZATION_ALL 0x00000007u
/* -------------------- Character and string names ------------------------ */
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
index a798cdd4f..6c14be8dc 100644
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@@ -579,6 +579,7 @@ typedef struct pcre2_real_compile_context {
uint32_t parens_nest_limit;
uint32_t extra_options;
uint32_t max_varlookbehind;
+ uint32_t optimization_flags;
} pcre2_real_compile_context;
/* The real match context structure. */
@@ -646,6 +647,7 @@ typedef struct pcre2_real_code {
uint16_t top_backref; /* Highest numbered back reference */
uint16_t name_entry_size; /* Size (code units) of table entries */
uint16_t name_count; /* Number of name entries in the table */
+ uint32_t optimization_flags; /* Optimizations enabled at compile time */
} pcre2_real_code;
/* The real match data structure. Define ovector as large as it can ever
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 5de4666d1..78ba5067c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -14474,7 +14474,7 @@ if (!check_opcode_types(common, common->start, ccend))
}
/* Checking flags and updating ovector_start. */
-if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
+if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE))
{
common->req_char_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
@@ -14534,7 +14534,7 @@ memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
-if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
+if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) && !common->has_skip_in_assert_back)
detect_early_fail(common, common->start, &private_data_size, 0, 0);
set_private_data_ptrs(common, &private_data_size, ccend);
@@ -14600,7 +14600,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
mainloop_label = mainloop_entry(common);
continue_match_label = LABEL();
/* Forward search if possible. */
- if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
+ if (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE)
{
if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
;
@@ -14615,7 +14615,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
else
continue_match_label = LABEL();
-if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
+if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE))
{
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index f55410394..54b7232f3 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6788,7 +6788,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
-way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
+way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
transfer to the options for this function. The bits are guaranteed to be
@@ -7326,7 +7326,7 @@ for(;;)
However, there is an option (settable at compile time) that disables these,
for testing and for ensuring that all callouts do actually occur. */
- if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
+ if (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE)
{
/* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the
diff --git a/src/pcre2test.c b/src/pcre2test.c
index d8f5d6483..1379aec0e 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -468,6 +468,7 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_NL, /* Is a newline value */
MOD_NN, /* Is a number or a name; more than one may occur */
MOD_OPT, /* Is an option bit */
+ MOD_OPTMZ, /* Is an optimization directive */
MOD_SIZ, /* Is a PCRE2_SIZE value */
MOD_STR }; /* Is a string */
@@ -661,6 +662,8 @@ static modstruct modlist[] = {
{ "ascii_digit", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT, CO(extra_options) },
{ "ascii_posix", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_POSIX, CO(extra_options) },
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
+ { "auto_possess", MOD_CTC, MOD_OPTMZ, PCRE2_AUTO_POSSESS, 0 },
+ { "auto_possess_off", MOD_CTC, MOD_OPTMZ, PCRE2_AUTO_POSSESS_OFF, 0 },
{ "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
{ "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
@@ -688,6 +691,8 @@ static modstruct modlist[] = {
{ "disable_recurseloop_check", MOD_DAT, MOD_OPT, PCRE2_DISABLE_RECURSELOOP_CHECK, DO(options) },
{ "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
{ "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
+ { "dotstar_anchor", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR, 0 },
+ { "dotstar_anchor_off", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR_OFF, 0 },
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
{ "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
@@ -744,6 +749,8 @@ static modstruct modlist[] = {
{ "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) },
{ "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
+ { "optimization_full", MOD_CTC, MOD_OPTMZ, PCRE2_OPTIMIZATION_FULL, 0 },
+ { "optimization_none", MOD_CTC, MOD_OPTMZ, PCRE2_OPTIMIZATION_NONE, 0 },
{ "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
{ "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
{ "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
@@ -760,6 +767,8 @@ static modstruct modlist[] = {
{ "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
+ { "start_optimize", MOD_CTC, MOD_OPTMZ, PCRE2_START_OPTIMIZE, 0 },
+ { "start_optimize_off", MOD_CTC, MOD_OPTMZ, PCRE2_START_OPTIMIZE_OFF, 0 },
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
@@ -3884,7 +3893,7 @@ for (;;)
when needed. */
m = modlist + index; /* Save typing */
- if (m->type != MOD_CTL && m->type != MOD_OPT &&
+ if (m->type != MOD_CTL && m->type != MOD_OPT && m->type != MOD_OPTMZ &&
(m->type != MOD_IND || *pp == '='))
{
if (*pp++ != '=')
@@ -3925,6 +3934,21 @@ for (;;)
else *((uint32_t *)field) |= m->value;
break;
+ case MOD_OPTMZ:
+#ifdef SUPPORT_PCRE2_8
+ if (test_mode == PCRE8_MODE)
+ pcre2_set_optimize_8((pcre2_compile_context_8*)field, m->value);
+#endif
+#ifdef SUPPORT_PCRE2_16
+ if (test_mode == PCRE16_MODE)
+ pcre2_set_optimize_16((pcre2_compile_context_16*)field, m->value);
+#endif
+#ifdef SUPPORT_PCRE2_32
+ if (test_mode == PCRE32_MODE)
+ pcre2_set_optimize_32((pcre2_compile_context_32*)field, m->value);
+#endif
+ break;
+
case MOD_BSR:
if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
{
@@ -4361,6 +4385,33 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
}
+/*************************************************
+* Show optimization flags *
+*************************************************/
+
+/*
+Arguments:
+ flags an options word
+ before text to print before
+ after text to print after
+
+Returns: nothing
+*/
+
+static void
+show_optimize_flags(uint32_t flags, const char *before, const char *after)
+{
+if (flags == 0) fprintf(outfile, "%s