From cd4c0e3fc172fd41dd6b74a79dea21fee39cbc43 Mon Sep 17 00:00:00 2001 From: Philip Hazel Date: Sat, 21 Sep 2024 15:01:44 +0100 Subject: [PATCH] Improve error messages for NO_BS0 and PYTHON_OCTAL errors --- src/pcre2.h.in | 3 ++- src/pcre2_compile.c | 11 +++++++---- src/pcre2_compile.h | 2 +- src/pcre2_error.c | 3 ++- src/pcre2posix.c | 9 ++++++--- testdata/testoutput10 | 4 ++-- testdata/testoutput2 | 2 +- testdata/testoutput5 | 2 +- 8 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 4775b6b81..235eb47fb 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -321,10 +321,11 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196 #define PCRE2_ERROR_TOO_MANY_CAPTURES 197 -#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198 +#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199 #define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200 #define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201 +#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202 /* "Expected" matching error codes: no match and partial match. */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 7c419761b..a3367da05 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1890,16 +1890,19 @@ else c -= CHAR_0; while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) c = c * 8 + *ptr++ - CHAR_0; + if (c > 0xff) + { + if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) *errorcodeptr = ERR102; #if PCRE2_CODE_UNIT_WIDTH == 8 - if (!utf && c > 0xff) *errorcodeptr = ERR51; + else if (!utf) *errorcodeptr = ERR51; #endif - if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0 && c > 0xff) - *errorcodeptr = ERR51; + } /* PCRE2_EXTRA_NO_BS0 disables the NUL escape '\0' but doesn't affect two- or three-character octal escapes \00 and \000, nor \x00. */ + if ((xoptions & PCRE2_EXTRA_NO_BS0) != 0 && c == 0 && i == 1) - *errorcodeptr = ERR3; + *errorcodeptr = ERR98; break; /* \o is a relatively new Perl feature, supporting a more general way of diff --git a/src/pcre2_compile.h b/src/pcre2_compile.h index 3fff760bc..76f6df260 100644 --- a/src/pcre2_compile.h +++ b/src/pcre2_compile.h @@ -61,7 +61,7 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, - ERR101 }; + ERR101,ERR102 }; /* Code values for parsed patterns, which are stored in a vector of 32-bit unsigned ints. Values less than META_END are literal data values. The coding diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 354d6a603..41160d6d4 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -185,11 +185,12 @@ static const unsigned char compile_error_texts[] = "(*alpha_assertion) not recognized\0" "script runs require Unicode support, which this version of PCRE2 does not have\0" "too many capturing groups (maximum 65535)\0" - "UNUSED ERROR\0" + "octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0" "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" /* 100 */ "branch too long in variable-length lookbehind assertion\0" "compiled pattern would be longer than the limit set by the application\0" + "octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/src/pcre2posix.c b/src/pcre2posix.c index 9fe3199d8..75d8cc7b0 100644 --- a/src/pcre2posix.c +++ b/src/pcre2posix.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2022 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -98,7 +98,8 @@ changed. This #define is a copy of the one in pcre2_internal.h. */ /* Table to translate PCRE2 compile time error codes into POSIX error codes. Only a few PCRE2 errors with a value greater than 23 turn into special POSIX codes: most go to REG_BADPAT. The second table lists, in pairs, those that -don't. */ +don't, even though some of them cannot currently be provoked from within the +POSIX wrapper. */ static const int eint1[] = { 0, /* No error */ @@ -137,7 +138,9 @@ static const int eint2[] = { 37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */ 56, REG_INVARG, /* internal error: unknown newline setting */ 92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */ - 99, REG_EESCAPE /* \K in lookaround */ + 98, REG_EESCAPE, /* missing digit after \0 in NO_BS0 mode */ + 99, REG_EESCAPE, /* \K in lookaround */ + 102, REG_EESCAPE /* \ddd octal > \377 in PYTHON_OCTAL mode */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testoutput10 b/testdata/testoutput10 index ddc8e3e37..88051a3e5 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1965,7 +1965,7 @@ Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string /\400/python_octal -Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) /abc/substitute_extended,python_octal abc\=replace=\400 @@ -1978,7 +1978,7 @@ Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement 1: \x{100} /\400/utf,python_octal -Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) /abc/utf,substitute_extended,python_octal abc\=replace=\400 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 288530dba..bf7b7620e 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -19477,7 +19477,7 @@ Failed: error 161 at offset 12: subpattern number is too big 0: a\x00b\x00c\x00d /a\0b/no_bs0 -Failed: error 103 at offset 3: unrecognized character follows \ +Failed: error 198 at offset 3: octal digit missing after \0 (PCRE2_EXTRA_NO_BS0 is set) /b\x00c\00d/no_bs0 b\x{00}c\x{00}d diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 55bec91cb..a8c28966b 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -5530,7 +5530,7 @@ Failed: error 147 at offset 10: unknown property after \P or \p 0: \x{100} /\400/utf,python_octal -Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode +Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL) /abc/utf,substitute_extended abc\=replace=\400