Skip to content

Commit

Permalink
Improve error messages for NO_BS0 and PYTHON_OCTAL errors
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipHazel committed Sep 21, 2024
1 parent 1d73e19 commit cd4c0e3
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/pcre2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,11 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198
#define PCRE2_ERROR_MISSING_OCTAL_DIGIT 198
#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND 199
#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED 200
#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG 201
#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL 202


/* "Expected" matching error codes: no match and partial match. */
Expand Down
11 changes: 7 additions & 4 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1890,16 +1890,19 @@ else
c -= CHAR_0;
while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7)
c = c * 8 + *ptr++ - CHAR_0;
if (c > 0xff)
{
if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0) *errorcodeptr = ERR102;
#if PCRE2_CODE_UNIT_WIDTH == 8
if (!utf && c > 0xff) *errorcodeptr = ERR51;
else if (!utf) *errorcodeptr = ERR51;
#endif
if ((xoptions & PCRE2_EXTRA_PYTHON_OCTAL) != 0 && c > 0xff)
*errorcodeptr = ERR51;
}

/* PCRE2_EXTRA_NO_BS0 disables the NUL escape '\0' but doesn't affect
two- or three-character octal escapes \00 and \000, nor \x00. */

if ((xoptions & PCRE2_EXTRA_NO_BS0) != 0 && c == 0 && i == 1)
*errorcodeptr = ERR3;
*errorcodeptr = ERR98;
break;

/* \o is a relatively new Perl feature, supporting a more general way of
Expand Down
2 changes: 1 addition & 1 deletion src/pcre2_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
ERR101 };
ERR101,ERR102 };

/* Code values for parsed patterns, which are stored in a vector of 32-bit
unsigned ints. Values less than META_END are literal data values. The coding
Expand Down
3 changes: 2 additions & 1 deletion src/pcre2_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,12 @@ static const unsigned char compile_error_texts[] =
"(*alpha_assertion) not recognized\0"
"script runs require Unicode support, which this version of PCRE2 does not have\0"
"too many capturing groups (maximum 65535)\0"
"UNUSED ERROR\0"
"octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
"\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
/* 100 */
"branch too long in variable-length lookbehind assertion\0"
"compiled pattern would be longer than the limit set by the application\0"
"octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
;

/* Match-time and UTF error texts are in the same format. */
Expand Down
9 changes: 6 additions & 3 deletions src/pcre2posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -98,7 +98,8 @@ changed. This #define is a copy of the one in pcre2_internal.h. */
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
don't. */
don't, even though some of them cannot currently be provoked from within the
POSIX wrapper. */

static const int eint1[] = {
0, /* No error */
Expand Down Expand Up @@ -137,7 +138,9 @@ static const int eint2[] = {
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
56, REG_INVARG, /* internal error: unknown newline setting */
92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */
99, REG_EESCAPE /* \K in lookaround */
98, REG_EESCAPE, /* missing digit after \0 in NO_BS0 mode */
99, REG_EESCAPE, /* \K in lookaround */
102, REG_EESCAPE /* \ddd octal > \377 in PYTHON_OCTAL mode */
};

/* Table of texts corresponding to POSIX error codes */
Expand Down
4 changes: 2 additions & 2 deletions testdata/testoutput10
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,7 @@ Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF
Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string

/\400/python_octal
Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode
Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)

/abc/substitute_extended,python_octal
abc\=replace=\400
Expand All @@ -1978,7 +1978,7 @@ Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement
1: \x{100}

/\400/utf,python_octal
Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode
Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)

/abc/utf,substitute_extended,python_octal
abc\=replace=\400
Expand Down
2 changes: 1 addition & 1 deletion testdata/testoutput2
Original file line number Diff line number Diff line change
Expand Up @@ -19477,7 +19477,7 @@ Failed: error 161 at offset 12: subpattern number is too big
0: a\x00b\x00c\x00d

/a\0b/no_bs0
Failed: error 103 at offset 3: unrecognized character follows \
Failed: error 198 at offset 3: octal digit missing after \0 (PCRE2_EXTRA_NO_BS0 is set)

/b\x00c\00d/no_bs0
b\x{00}c\x{00}d
Expand Down
2 changes: 1 addition & 1 deletion testdata/testoutput5
Original file line number Diff line number Diff line change
Expand Up @@ -5530,7 +5530,7 @@ Failed: error 147 at offset 10: unknown property after \P or \p
0: \x{100}

/\400/utf,python_octal
Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF-8 mode
Failed: error 202 at offset 4: octal value given by \ddd is greater than \377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)

/abc/utf,substitute_extended
abc\=replace=\400
Expand Down

0 comments on commit cd4c0e3

Please sign in to comment.