Fix PCRE2_FIRSTLINE bug when a pattern match starts with the first code unit of
a newline sequence.
This commit is contained in:
parent
800d884bce
commit
7a6e8a4454
12
ChangeLog
12
ChangeLog
|
@ -100,11 +100,17 @@ code for -g that handles the case when \K in an assertion causes the match to
|
|||
end at the original start point. Also arranged for it to detect when \K causes
|
||||
the end of a match to be before its start.
|
||||
|
||||
24. Similar to 23 above, strange things (including loops) could happen in
|
||||
pcre2grep when \K was used in an assertion when --colour was used or in
|
||||
multiline mode. The "end at original start point" bug is fixed, and if the end
|
||||
24. Similar to 23 above, strange things (including loops) could happen in
|
||||
pcre2grep when \K was used in an assertion when --colour was used or in
|
||||
multiline mode. The "end at original start point" bug is fixed, and if the end
|
||||
point is found to be before the start point, they are swapped.
|
||||
|
||||
25. When PCRE2_FIRSTLINE without PCRE2_NO_START_OPTIMIZE was used in non-JIT
|
||||
matching (both pcre2_match() and pcre2_dfa_match()) and the matched string
|
||||
started with the first code unit of a newline sequence, matching failed because
|
||||
the search for the first code unit stopped before rather than after the first
|
||||
code unit of a newline in the subject string.
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -3367,9 +3367,11 @@ for (;;)
|
|||
|
||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline. Implement this by temporarily adjusting end_subject so that
|
||||
we stop the optimization scans for a first code unit at a newline. If the
|
||||
match fails at the newline, later code breaks this loop. */
|
||||
first newline following the start of matching. Temporarily adjust
|
||||
end_subject so that we stop the optimization scans for a first code unit
|
||||
immediately after the first character of a newline (the first code unit can
|
||||
legitimately be a newline). If the match fails at the newline, later code
|
||||
breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
|
@ -3377,7 +3379,7 @@ for (;;)
|
|||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||
while (t < end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
ACROSSCHAR(t < end_subject, *t, t++);
|
||||
|
@ -3385,7 +3387,14 @@ for (;;)
|
|||
}
|
||||
else
|
||||
#endif
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
|
||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||
|
||||
/* Note that we only need to advance by one code unit if we found a
|
||||
newline. If the newline is CRLF, a first code unit of LF should not
|
||||
match, because it is not at or before the newline. Similarly, only the
|
||||
first code unit of a Unicode newline might be relevant. */
|
||||
|
||||
if (t < end_subject) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
|
|
|
@ -6367,9 +6367,11 @@ for(;;)
|
|||
|
||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline. Implement this by temporarily adjusting end_subject so that
|
||||
we stop the optimization scans for a first code unit at a newline. If the
|
||||
match fails at the newline, later code breaks this loop. */
|
||||
first newline following the start of matching. Temporarily adjust
|
||||
end_subject so that we stop the optimization scans for a first code unit
|
||||
immediately after the first character of a newline (the first code unit can
|
||||
legitimately be a newline). If the match fails at the newline, later code
|
||||
breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
|
@ -6377,7 +6379,7 @@ for(;;)
|
|||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t))
|
||||
while (t < end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
ACROSSCHAR(t < end_subject, *t, t++);
|
||||
|
@ -6385,7 +6387,14 @@ for(;;)
|
|||
}
|
||||
else
|
||||
#endif
|
||||
while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
|
||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||
|
||||
/* Note that we only need to advance by one code unit if we found a
|
||||
newline. If the newline is CRLF, a first code unit of LF should not
|
||||
match, because it is not at or before the newline. Similarly, only the
|
||||
first code unit of a Unicode newline might be relevant. */
|
||||
|
||||
if (t < end_subject) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
|
@ -6648,7 +6657,7 @@ for(;;)
|
|||
|
||||
cb.start_match = (PCRE2_SIZE)(start_match - subject);
|
||||
cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
|
||||
|
||||
|
||||
mb->start_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
mb->match_call_count = 0;
|
||||
|
|
|
@ -5395,4 +5395,14 @@ a)"xI
|
|||
\= Expect no match
|
||||
aac\=callout_extra
|
||||
|
||||
/\n/firstline
|
||||
xyz\nabc
|
||||
|
||||
/\nabc/firstline
|
||||
xyz\nabc
|
||||
|
||||
/\x{0a}abc/firstline,newline=crlf
|
||||
\= Expect no match
|
||||
xyz\r\nabc
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4932,4 +4932,14 @@
|
|||
/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor
|
||||
.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
|
||||
|
||||
/\n/firstline
|
||||
xyz\nabc
|
||||
|
||||
/\nabc/firstline
|
||||
xyz\nabc
|
||||
|
||||
/\x{0a}abc/firstline,newline=crlf
|
||||
\= Expect no match
|
||||
xyz\r\nabc
|
||||
|
||||
# End of testinput6
|
||||
|
|
|
@ -16440,6 +16440,19 @@ Callout (15): 'XXX'
|
|||
^^ b
|
||||
No match
|
||||
|
||||
/\n/firstline
|
||||
xyz\nabc
|
||||
0: \x0a
|
||||
|
||||
/\nabc/firstline
|
||||
xyz\nabc
|
||||
0: \x0aabc
|
||||
|
||||
/\x{0a}abc/firstline,newline=crlf
|
||||
\= Expect no match
|
||||
xyz\r\nabc
|
||||
No match
|
||||
|
||||
# End of testinput2
|
||||
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
|
@ -7753,4 +7753,17 @@ No match
|
|||
.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
|
||||
Failed: error -47: match limit exceeded
|
||||
|
||||
/\n/firstline
|
||||
xyz\nabc
|
||||
0: \x0a
|
||||
|
||||
/\nabc/firstline
|
||||
xyz\nabc
|
||||
0: \x0aabc
|
||||
|
||||
/\x{0a}abc/firstline,newline=crlf
|
||||
\= Expect no match
|
||||
xyz\r\nabc
|
||||
No match
|
||||
|
||||
# End of testinput6
|
||||
|
|
Loading…
Reference in New Issue