Skip to content

Commit

Permalink
Fix \X matching in 32 bit mode without UTF in JIT
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoltan Herczeg committed Nov 22, 2023
1 parent a0c5393 commit 45dcb3d
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 3 deletions.
2 changes: 2 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ undefined behaviour.

44. Implement --group-separator and --no-group-separator for pcre2grep.

45. Fix \X matching in 32 bit mode without UTF in JIT.


Version 10.42 11-December-2022
------------------------------
Expand Down
6 changes: 3 additions & 3 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -8718,7 +8718,7 @@ c = *cc++;

#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
return NULL;
return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);

Expand Down Expand Up @@ -8958,7 +8958,7 @@ switch(type)
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
if (!common->utf || common->invalid_utf)
if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif

Expand Down Expand Up @@ -12044,7 +12044,7 @@ switch(opcode)
}

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
if (type == OP_EXTUNI || common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
Expand Down
4 changes: 4 additions & 0 deletions testdata/testinput12
Original file line number Diff line number Diff line change
Expand Up @@ -569,4 +569,8 @@
/\x{802a0000}*/
\x{802a0000}\x{802a0000}

# UTF matching without UTF, check invalid UTF characters
/\X++/
a\x{110000}\x{ffffffff}

# End of testinput12
9 changes: 9 additions & 0 deletions testdata/testoutput12-16
Original file line number Diff line number Diff line change
Expand Up @@ -1814,4 +1814,13 @@ No match
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
\x{802a0000}\x{802a0000}

# UTF matching without UTF, check invalid UTF characters
/\X++/
a\x{110000}\x{ffffffff}
** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
0: a\x00\x{ffff}

# End of testinput12
5 changes: 5 additions & 0 deletions testdata/testoutput12-32
Original file line number Diff line number Diff line change
Expand Up @@ -1812,4 +1812,9 @@ No match
\x{802a0000}\x{802a0000}
0: \x{802a0000}\x{802a0000}

# UTF matching without UTF, check invalid UTF characters
/\X++/
a\x{110000}\x{ffffffff}
0: a\x{110000}\x{ffffffff}

# End of testinput12

0 comments on commit 45dcb3d

Please sign in to comment.