diff --git a/ChangeLog b/ChangeLog index 2e184793e..0465b93a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -82,21 +82,23 @@ pattern. 14. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z, which was still misbehaving when matching fragments inside invalid UTF strings. -15. Octal escapes of the form \045 or \111 were not being recognized in +15. Octal escapes of the form \045 or \111 were not being recognized in substitution strings, and if encountered gave an error, though the \o{...} form was recognized. This bug is now fixed. -16. Merged PR475, which implements title casing in substitution strings a la +16. Merged PR475, which implements title casing in substitution strings a la Perl. 17. Merged PR478, which disallows \x if not followed by { or a hex digit. 18. Merged PR473, which implements Python-style backrefs in substitutions. -19. Merged PR483, which adding \g and $ to replacement strings. +19. Merged PR483, which is adding \g and $ to replacement strings. 20. Merged PR470, which adds PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL. +21. Prevent 1 byte overread when parsing malformed patterns with early VERBs. + Version 10.44 07-June-2024 -------------------------- diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index 232125b82..db0bb6586 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -408,8 +408,8 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example example (?i:...). .P The following are recognized only at the very start of a pattern or after one -of the newline or \eR options with similar syntax. More than one of them may -appear. For the first three, d is a decimal number. +of the newline or \eR sequences or options with similar syntax. More than one +of them may appear. For the first three, d is a decimal number. .sp (*LIMIT_DEPTH=d) set the backtracking limit to d (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index a3367da05..936f490cd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -10404,12 +10404,13 @@ if ((options & PCRE2_LITERAL) == 0) { for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) { - uint32_t c, pp; const pso *p = pso_list + i; if (patlen - skipatstart - 2 >= p->length && PRIV(strncmp_c8)(ptr + skipatstart + 2, p->name, p->length) == 0) { + uint32_t c, pp; + skipatstart += p->length + 2; switch(p->type) { @@ -10436,18 +10437,12 @@ if ((options & PCRE2_LITERAL) == 0) case PSO_LIMH: c = 0; pp = skipatstart; - if (!IS_DIGIT(ptr[pp])) - { - errorcode = ERR60; - ptr += pp; - goto HAD_EARLY_ERROR; - } while (pp < patlen && IS_DIGIT(ptr[pp])) { if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ c = c*10 + (ptr[pp++] - CHAR_0); } - if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS) + if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS) { errorcode = ERR60; ptr += pp; diff --git a/testdata/testinput2 b/testdata/testinput2 index a869c5bc2..542d14520 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5261,6 +5261,14 @@ a)"xI /(*LIMIT_HEAP=0)xxx/I +/(*LIMIT_HEAP=123/use_length + +/(*LIMIT_MATCH=/use_length + +/(*CRLF)(*LIMIT_DEPTH=/use_length + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length + /\d{0,3}(*:abc)(?C1)xxx/callout_info # ---------------------------------------------------------------------- diff --git a/testdata/testoutput2 b/testdata/testoutput2 index bf7b7620e..b99d64781 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16220,6 +16220,18 @@ First code unit = 'x' Last code unit = 'x' Subject length lower bound = 3 +/(*LIMIT_HEAP=123/use_length +Failed: error 160 at offset 16: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=/use_length +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_DEPTH=/use_length +Failed: error 160 at offset 21: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length +Failed: error 160 at offset 34: (*VERB) not recognized or malformed + /\d{0,3}(*:abc)(?C1)xxx/callout_info Callout 1 x