Fix \C bug with repeated character classes in UTF-8 mode.
This commit is contained in:
parent
553bf8a1dc
commit
b26aa366ba
|
@ -20,6 +20,11 @@ Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
|
||||||
specified. Similarly, running "pcfre2test -C bsr" never produced the result
|
specified. Similarly, running "pcfre2test -C bsr" never produced the result
|
||||||
ANY.
|
ANY.
|
||||||
|
|
||||||
|
4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
|
||||||
|
multi-code-unit characters caused bad behaviour and possibly a crash. This
|
||||||
|
issue was fixed for other kinds of repeat in release 10.20 by change 19, but
|
||||||
|
repeating character classes were overlooked.
|
||||||
|
|
||||||
|
|
||||||
Version 10.31 12-February-2018
|
Version 10.31 12-February-2018
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
|
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
|
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
|
go too far. */
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
RMATCH(Fecode, RM201);
|
RMATCH(Fecode, RM201);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||||
BACKCHAR(Feptr);
|
BACKCHAR(Feptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
|
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
|
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
|
go too far. */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
RMATCH(Fecode, RM101);
|
RMATCH(Fecode, RM101);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) BACKCHAR(Feptr);
|
if (utf) BACKCHAR(Feptr);
|
||||||
#endif
|
#endif
|
||||||
|
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
Unicode character. Use <= pp to ensure backtracking doesn't go too far.
|
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
*/
|
go too far. */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
|
|
|
@ -98,4 +98,7 @@
|
||||||
\= Expect no match - tests \C at end of subject
|
\= Expect no match - tests \C at end of subject
|
||||||
ab
|
ab
|
||||||
|
|
||||||
|
/\C[^\v]+\x80/utf
|
||||||
|
[AΏBŀC]
|
||||||
|
|
||||||
# End of testinput22
|
# End of testinput22
|
||||||
|
|
|
@ -171,4 +171,8 @@ No match
|
||||||
ab
|
ab
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/\C[^\v]+\x80/utf
|
||||||
|
[AΏBŀC]
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput22
|
# End of testinput22
|
||||||
|
|
|
@ -169,4 +169,8 @@ No match
|
||||||
ab
|
ab
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/\C[^\v]+\x80/utf
|
||||||
|
[AΏBŀC]
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput22
|
# End of testinput22
|
||||||
|
|
|
@ -173,4 +173,8 @@ No match
|
||||||
ab
|
ab
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/\C[^\v]+\x80/utf
|
||||||
|
[AΏBŀC]
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput22
|
# End of testinput22
|
||||||
|
|
Loading…
Reference in New Issue