Fix backtracking bug for \C\X* in UTF mode.
This commit is contained in:
parent
7105d249f6
commit
aa8ee3ded5
|
@ -73,6 +73,12 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
||||||
18. There was a similar problem to 17 in pcre2test for global matches, though
|
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||||
the code there did catch the loop.
|
the code there did catch the loop.
|
||||||
|
|
||||||
|
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
|
||||||
|
and a subsequent item in the pattern caused a non-match, backtracking over the
|
||||||
|
repeated \X did not stop, but carried on past the start of the subject, causing
|
||||||
|
reference to random memory and/or a segfault. This bug was discovered by the
|
||||||
|
LLVM fuzzer.
|
||||||
|
|
||||||
|
|
||||||
Version 10.10 06-March-2015
|
Version 10.10 06-March-2015
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -1333,14 +1333,14 @@ for (;;)
|
||||||
if (*ecode == OP_CALLOUT)
|
if (*ecode == OP_CALLOUT)
|
||||||
{
|
{
|
||||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||||
cb.callout_string_offset = 0;
|
cb.callout_string_offset = 0;
|
||||||
cb.callout_string = NULL;
|
cb.callout_string = NULL;
|
||||||
cb.callout_string_length = 0;
|
cb.callout_string_length = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cb.callout_number = 0;
|
cb.callout_number = 0;
|
||||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||||
cb.callout_string_length =
|
cb.callout_string_length =
|
||||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||||
|
@ -1408,7 +1408,7 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_FALSE:
|
case OP_FALSE:
|
||||||
case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
|
case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_TRUE:
|
case OP_TRUE:
|
||||||
|
@ -1760,14 +1760,14 @@ for (;;)
|
||||||
if (*ecode == OP_CALLOUT)
|
if (*ecode == OP_CALLOUT)
|
||||||
{
|
{
|
||||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||||
cb.callout_string_offset = 0;
|
cb.callout_string_offset = 0;
|
||||||
cb.callout_string = NULL;
|
cb.callout_string = NULL;
|
||||||
cb.callout_string_length = 0;
|
cb.callout_string_length = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cb.callout_number = 0;
|
cb.callout_number = 0;
|
||||||
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
|
||||||
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
|
||||||
cb.callout_string_length =
|
cb.callout_string_length =
|
||||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
callout_length - (1 + 4*LINK_SIZE) - 2;
|
||||||
|
@ -5723,12 +5723,17 @@ for (;;)
|
||||||
|
|
||||||
if (possessive) continue; /* No backtracking */
|
if (possessive) continue; /* No backtracking */
|
||||||
|
|
||||||
|
/* We use <= pp rather than == pp to detect the start of the run while
|
||||||
|
backtracking because the use of \C in UTF mode can cause BACKCHAR to
|
||||||
|
move back past pp. This is just palliative; the use of \C in UTF mode
|
||||||
|
is fraught with danger. */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
int lgb, rgb;
|
int lgb, rgb;
|
||||||
PCRE2_SPTR fptr;
|
PCRE2_SPTR fptr;
|
||||||
|
|
||||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
|
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
|
|
||||||
|
@ -5746,7 +5751,7 @@ for (;;)
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
|
||||||
fptr = eptr - 1;
|
fptr = eptr - 1;
|
||||||
if (!utf) c = *fptr; else
|
if (!utf) c = *fptr; else
|
||||||
{
|
{
|
||||||
|
|
|
@ -2221,4 +2221,9 @@
|
||||||
|
|
||||||
"[\S\V\H]"utf
|
"[\S\V\H]"utf
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
Ӆ\x0a
|
||||||
|
|
||||||
# End of testinput4
|
# End of testinput4
|
||||||
|
|
|
@ -3741,4 +3741,10 @@ No match
|
||||||
|
|
||||||
"[\S\V\H]"utf
|
"[\S\V\H]"utf
|
||||||
|
|
||||||
|
/\C\X*TӅ;
|
||||||
|
{0,6}\v+
F
|
||||||
|
/utf
|
||||||
|
Ӆ\x0a
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput4
|
# End of testinput4
|
||||||
|
|
Loading…
Reference in New Issue