From bc367f1880ae5ccc771d5780e35df4c42744a9c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 22 Sep 2024 01:49:03 -0700 Subject: [PATCH] pcre2_compile: avoid 1 byte buffer overread parsing VERBs (#487) As reported recently by ef218fb (Guard against out-of-bounds memory access when parsing LIMIT_HEAP et al (#463), 2024-09-07), a malformed pattern could result in reading 1 byte past its end. Fix a similar issue that affects all VERBs and add test cases to ensure the original bug and all its siblings are no longer an issue. While at it fix the wording of the related documentation. --- ChangeLog | 8 +++++--- doc/pcre2syntax.3 | 4 ++-- src/pcre2_compile.c | 11 +++-------- testdata/testinput2 | 8 ++++++++ testdata/testoutput2 | 12 ++++++++++++ 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2e184793e..0465b93a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -82,21 +82,23 @@ pattern. 14. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z, which was still misbehaving when matching fragments inside invalid UTF strings. -15. Octal escapes of the form \045 or \111 were not being recognized in +15. Octal escapes of the form \045 or \111 were not being recognized in substitution strings, and if encountered gave an error, though the \o{...} form was recognized. This bug is now fixed. -16. Merged PR475, which implements title casing in substitution strings a la +16. Merged PR475, which implements title casing in substitution strings a la Perl. 17. Merged PR478, which disallows \x if not followed by { or a hex digit. 18. Merged PR473, which implements Python-style backrefs in substitutions. -19. Merged PR483, which adding \g and $ to replacement strings. +19. Merged PR483, which is adding \g and $ to replacement strings. 20. Merged PR470, which adds PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL. +21. Prevent 1 byte overread when parsing malformed patterns with early VERBs. + Version 10.44 07-June-2024 -------------------------- diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index 232125b82..db0bb6586 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -408,8 +408,8 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example example (?i:...). .P The following are recognized only at the very start of a pattern or after one -of the newline or \eR options with similar syntax. More than one of them may -appear. For the first three, d is a decimal number. +of the newline or \eR sequences or options with similar syntax. More than one +of them may appear. For the first three, d is a decimal number. .sp (*LIMIT_DEPTH=d) set the backtracking limit to d (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index a3367da05..936f490cd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -10404,12 +10404,13 @@ if ((options & PCRE2_LITERAL) == 0) { for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) { - uint32_t c, pp; const pso *p = pso_list + i; if (patlen - skipatstart - 2 >= p->length && PRIV(strncmp_c8)(ptr + skipatstart + 2, p->name, p->length) == 0) { + uint32_t c, pp; + skipatstart += p->length + 2; switch(p->type) { @@ -10436,18 +10437,12 @@ if ((options & PCRE2_LITERAL) == 0) case PSO_LIMH: c = 0; pp = skipatstart; - if (!IS_DIGIT(ptr[pp])) - { - errorcode = ERR60; - ptr += pp; - goto HAD_EARLY_ERROR; - } while (pp < patlen && IS_DIGIT(ptr[pp])) { if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ c = c*10 + (ptr[pp++] - CHAR_0); } - if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS) + if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS) { errorcode = ERR60; ptr += pp; diff --git a/testdata/testinput2 b/testdata/testinput2 index a869c5bc2..542d14520 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5261,6 +5261,14 @@ a)"xI /(*LIMIT_HEAP=0)xxx/I +/(*LIMIT_HEAP=123/use_length + +/(*LIMIT_MATCH=/use_length + +/(*CRLF)(*LIMIT_DEPTH=/use_length + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length + /\d{0,3}(*:abc)(?C1)xxx/callout_info # ---------------------------------------------------------------------- diff --git a/testdata/testoutput2 b/testdata/testoutput2 index bf7b7620e..b99d64781 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16220,6 +16220,18 @@ First code unit = 'x' Last code unit = 'x' Subject length lower bound = 3 +/(*LIMIT_HEAP=123/use_length +Failed: error 160 at offset 16: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=/use_length +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_DEPTH=/use_length +Failed: error 160 at offset 21: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length +Failed: error 160 at offset 34: (*VERB) not recognized or malformed + /\d{0,3}(*:abc)(?C1)xxx/callout_info Callout 1 x