Fix \C bug with repeated character classes in UTF-8 mode.
This commit is contained in:
parent
553bf8a1dc
commit
b26aa366ba
|
@ -20,6 +20,11 @@ Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
|
|||
specified. Similarly, running "pcfre2test -C bsr" never produced the result
|
||||
ANY.
|
||||
|
||||
4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
|
||||
multi-code-unit characters caused bad behaviour and possibly a crash. This
|
||||
issue was fixed for other kinds of repeat in release 10.20 by change 19, but
|
||||
repeating character classes were overlooked.
|
||||
|
||||
|
||||
Version 10.31 12-February-2018
|
||||
------------------------------
|
||||
|
|
|
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(Fecode, RM201);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||
BACKCHAR(Feptr);
|
||||
}
|
||||
}
|
||||
|
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
RMATCH(Fecode, RM101);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(Feptr);
|
||||
#endif
|
||||
|
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= pp to ensure backtracking doesn't go too far.
|
||||
*/
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
|
|
|
@ -98,4 +98,7 @@
|
|||
\= Expect no match - tests \C at end of subject
|
||||
ab
|
||||
|
||||
/\C[^\v]+\x80/utf
|
||||
[AΏBŀC]
|
||||
|
||||
# End of testinput22
|
||||
|
|
|
@ -171,4 +171,8 @@ No match
|
|||
ab
|
||||
No match
|
||||
|
||||
/\C[^\v]+\x80/utf
|
||||
[AΏBŀC]
|
||||
No match
|
||||
|
||||
# End of testinput22
|
||||
|
|
|
@ -169,4 +169,8 @@ No match
|
|||
ab
|
||||
No match
|
||||
|
||||
/\C[^\v]+\x80/utf
|
||||
[AΏBŀC]
|
||||
No match
|
||||
|
||||
# End of testinput22
|
||||
|
|
|
@ -173,4 +173,8 @@ No match
|
|||
ab
|
||||
No match
|
||||
|
||||
/\C[^\v]+\x80/utf
|
||||
[AΏBŀC]
|
||||
No match
|
||||
|
||||
# End of testinput22
|
||||
|
|
Loading…
Reference in New Issue