From 8d3e96ceae6e5931444d461b6acea47ee1a8bed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 22 Oct 2023 06:07:32 -0700 Subject: [PATCH] maketables: avoid misleading values in case flipping table (#313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tables generated by pcre2_maketables() include one that maps all lowercase characters on the first 255 code points to their corresponding upper case code point, but fails to notice that toupper() could return a larger code point and therefore result in the store of a truncated and unrelated code instead. Restrict all values to what is valid for uint8_t and document in the test case the failure for character 'ÎĽ'[1] (U+00B5) and that was incorrectly getting back 924 (U+039C) from macOS fr_FR, and resulting in an incorrect case equivalent with the truncated value of 159. [1] https://en.wikipedia.org/wiki/Mu_(letter) --- src/pcre2_maketables.c | 8 +++++--- testdata/testinput3 | 5 +++++ testdata/testoutput3 | 7 +++++++ testdata/testoutput3A | 7 +++++++ testdata/testoutput3B | 7 +++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index 56d249402..ac8b63b80 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -52,8 +52,6 @@ PCRE2_DFTABLES is defined. */ # include "pcre2_internal.h" #endif - - /************************************************* * Create PCRE2 character tables * *************************************************/ @@ -98,7 +96,11 @@ for (i = 0; i < 256; i++) *p++ = tolower(i); /* Next the case-flipping table */ -for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); +for (i = 0; i < 256; i++) + { + int c = islower(i)? toupper(i) : tolower(i); + *p++ = (c < 256)? c : i; + } /* Then the character class tables. Don't try to be clever and save effort on exclusive ones - in some locales things may be different. diff --git a/testdata/testinput3 b/testdata/testinput3 index 71e95fec4..20f8d4c23 100644 --- a/testdata/testinput3 +++ b/testdata/testinput3 @@ -75,6 +75,11 @@ \= Expect no match école +/\xb5/i + µ +\= Expect no match + \x9c + /\W+/ >>>\xaa<<< >>>\xba<<< diff --git a/testdata/testoutput3 b/testdata/testoutput3 index 801966a98..717fa4569 100644 --- a/testdata/testoutput3 +++ b/testdata/testoutput3 @@ -108,6 +108,13 @@ Subject length lower bound = 1 école No match +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + /\W+/ >>>\xaa<<< 0: >>> diff --git a/testdata/testoutput3A b/testdata/testoutput3A index d7a223ab1..53f6894fc 100644 --- a/testdata/testoutput3A +++ b/testdata/testoutput3A @@ -108,6 +108,13 @@ Subject length lower bound = 1 école No match +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + /\W+/ >>>\xaa<<< 0: >>> diff --git a/testdata/testoutput3B b/testdata/testoutput3B index b18d441b4..1e22efe46 100644 --- a/testdata/testoutput3B +++ b/testdata/testoutput3B @@ -108,6 +108,13 @@ Subject length lower bound = 1 école No match +/\xb5/i + µ + 0: µ +\= Expect no match + \x9c +No match + /\W+/ >>>\xaa<<< 0: >>>