Fix backtracking bug for \C\X* in UTF mode.
This commit is contained in:
parent
7105d249f6
commit
aa8ee3ded5
|
@ -73,6 +73,12 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
|||
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||
the code there did catch the loop.
|
||||
|
||||
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
|
||||
and a subsequent item in the pattern caused a non-match, backtracking over the
|
||||
repeated \X did not stop, but carried on past the start of the subject, causing
|
||||
reference to random memory and/or a segfault. This bug was discovered by the
|
||||
LLVM fuzzer.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -1333,14 +1333,14 @@ for (;;)
|
|||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
|
@ -1408,7 +1408,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
case OP_FALSE:
|
||||
case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
|
||||
case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
|
@ -1760,14 +1760,14 @@ for (;;)
|
|||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||
|
@ -5723,12 +5723,17 @@ for (;;)
|
|||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
/* We use <= pp rather than == pp to detect the start of the run while
|
||||
backtracking because the use of \C in UTF mode can cause BACKCHAR to
|
||||
move back past pp. This is just palliative; the use of \C in UTF mode
|
||||
is fraught with danger. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
int lgb, rgb;
|
||||
PCRE2_SPTR fptr;
|
||||
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
|
||||
|
@ -5746,7 +5751,7 @@ for (;;)
|
|||
|
||||
for (;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
fptr = eptr - 1;
|
||||
if (!utf) c = *fptr; else
|
||||
{
|
||||
|
|
|
@ -2221,4 +2221,9 @@
|
|||
|
||||
"[\S\V\H]"utf
|
||||
|
||||
/\C\X*TӅ;
|
||||
{0,6}\v+
F
|
||||
/utf
|
||||
Ӆ\x0a
|
||||
|
||||
# End of testinput4
|
||||
|
|
|
@ -3741,4 +3741,10 @@ No match
|
|||
|
||||
"[\S\V\H]"utf
|
||||
|
||||
/\C\X*TӅ;
|
||||
{0,6}\v+
F
|
||||
/utf
|
||||
Ӆ\x0a
|
||||
No match
|
||||
|
||||
# End of testinput4
|
||||
|
|
Loading…
Reference in New Issue