Previous FIRSTLINE patch was broken. Fix it.
This commit is contained in:
parent
7a6e8a4454
commit
807f37095d
|
@ -3363,8 +3363,6 @@ for (;;)
|
||||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||||
(options & PCRE2_DFA_RESTART) == 0)
|
(options & PCRE2_DFA_RESTART) == 0)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR save_end_subject = end_subject;
|
|
||||||
|
|
||||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||||
line of a multiline string. That is, the match must be before or at the
|
line of a multiline string. That is, the match must be before or at the
|
||||||
first newline following the start of matching. Temporarily adjust
|
first newline following the start of matching. Temporarily adjust
|
||||||
|
@ -3388,13 +3386,6 @@ for (;;)
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||||
|
|
||||||
/* Note that we only need to advance by one code unit if we found a
|
|
||||||
newline. If the newline is CRLF, a first code unit of LF should not
|
|
||||||
match, because it is not at or before the newline. Similarly, only the
|
|
||||||
first code unit of a Unicode newline might be relevant. */
|
|
||||||
|
|
||||||
if (t < end_subject) t++;
|
|
||||||
end_subject = t;
|
end_subject = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3466,14 +3457,18 @@ for (;;)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we can't find the required code unit, break the bumpalong loop,
|
/* If we can't find the required code unit, having reached the true end
|
||||||
to force a match failure, except when doing partial matching, when we
|
of the subject, break the bumpalong loop, to force a match failure,
|
||||||
let the next cycle run at the end of the subject. To see why, consider
|
except when doing partial matching, when we let the next cycle run at
|
||||||
the pattern /(?<=abc)def/, which partially matches "abc", even though
|
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||||
the string does not contain the starting character "d". */
|
which partially matches "abc", even though the string does not contain
|
||||||
|
the starting character "d". If we have not reached the true end of the
|
||||||
|
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||||
|
we also let the cycle run, because the matching string is legitimately
|
||||||
|
allowed to start with the first code unit of a newline. */
|
||||||
|
|
||||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||||
start_match >= end_subject)
|
start_match >= mb->end_subject)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3532,7 +3527,7 @@ for (;;)
|
||||||
|
|
||||||
/* Restore fudged end_subject */
|
/* Restore fudged end_subject */
|
||||||
|
|
||||||
end_subject = save_end_subject;
|
end_subject = mb->end_subject;
|
||||||
|
|
||||||
/* The following two optimizations are disabled for partial matching. */
|
/* The following two optimizations are disabled for partial matching. */
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2015-2017 University of Cambridge
|
New API code Copyright (c) 2015-2018 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -6363,15 +6363,11 @@ for(;;)
|
||||||
|
|
||||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR save_end_subject = end_subject;
|
|
||||||
|
|
||||||
/* If firstline is TRUE, the start of the match is constrained to the first
|
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||||
line of a multiline string. That is, the match must be before or at the
|
line of a multiline string. That is, the match must be before or at the
|
||||||
first newline following the start of matching. Temporarily adjust
|
first newline following the start of matching. Temporarily adjust
|
||||||
end_subject so that we stop the optimization scans for a first code unit
|
end_subject so that we stop the scans for a first code unit at a newline.
|
||||||
immediately after the first character of a newline (the first code unit can
|
If the match fails at the newline, later code breaks the loop. */
|
||||||
legitimately be a newline). If the match fails at the newline, later code
|
|
||||||
breaks this loop. */
|
|
||||||
|
|
||||||
if (firstline)
|
if (firstline)
|
||||||
{
|
{
|
||||||
|
@ -6388,13 +6384,6 @@ for(;;)
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
while (t < end_subject && !IS_NEWLINE(t)) t++;
|
||||||
|
|
||||||
/* Note that we only need to advance by one code unit if we found a
|
|
||||||
newline. If the newline is CRLF, a first code unit of LF should not
|
|
||||||
match, because it is not at or before the newline. Similarly, only the
|
|
||||||
first code unit of a Unicode newline might be relevant. */
|
|
||||||
|
|
||||||
if (t < end_subject) t++;
|
|
||||||
end_subject = t;
|
end_subject = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6470,13 +6459,17 @@ for(;;)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we can't find the required code unit, break the bumpalong loop,
|
/* If we can't find the required code unit, having reached the true end
|
||||||
to force a match failure, except when doing partial matching, when we
|
of the subject, break the bumpalong loop, to force a match failure,
|
||||||
let the next cycle run at the end of the subject. To see why, consider
|
except when doing partial matching, when we let the next cycle run at
|
||||||
the pattern /(?<=abc)def/, which partially matches "abc", even though
|
the end of the subject. To see why, consider the pattern /(?<=abc)def/,
|
||||||
the string does not contain the starting character "d". */
|
which partially matches "abc", even though the string does not contain
|
||||||
|
the starting character "d". If we have not reached the true end of the
|
||||||
|
subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
|
||||||
|
we also let the cycle run, because the matching string is legitimately
|
||||||
|
allowed to start with the first code unit of a newline. */
|
||||||
|
|
||||||
if (!mb->partial && start_match >= end_subject)
|
if (!mb->partial && start_match >= mb->end_subject)
|
||||||
{
|
{
|
||||||
rc = MATCH_NOMATCH;
|
rc = MATCH_NOMATCH;
|
||||||
break;
|
break;
|
||||||
|
@ -6538,7 +6531,7 @@ for(;;)
|
||||||
|
|
||||||
/* Restore fudged end_subject */
|
/* Restore fudged end_subject */
|
||||||
|
|
||||||
end_subject = save_end_subject;
|
end_subject = mb->end_subject;
|
||||||
|
|
||||||
/* The following two optimizations must be disabled for partial matching. */
|
/* The following two optimizations must be disabled for partial matching. */
|
||||||
|
|
||||||
|
|
|
@ -5405,4 +5405,8 @@ a)"xI
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
xyz\r\nabc
|
xyz\r\nabc
|
||||||
|
|
||||||
|
/[abc]/firstline
|
||||||
|
\= Expect no match
|
||||||
|
\na
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -4942,4 +4942,8 @@
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
xyz\r\nabc
|
xyz\r\nabc
|
||||||
|
|
||||||
|
/[abc]/firstline
|
||||||
|
\= Expect no match
|
||||||
|
\na
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
|
@ -16453,6 +16453,11 @@ No match
|
||||||
xyz\r\nabc
|
xyz\r\nabc
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/[abc]/firstline
|
||||||
|
\= Expect no match
|
||||||
|
\na
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
|
@ -7766,4 +7766,9 @@ Failed: error -47: match limit exceeded
|
||||||
xyz\r\nabc
|
xyz\r\nabc
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/[abc]/firstline
|
||||||
|
\= Expect no match
|
||||||
|
\na
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
Loading…
Reference in New Issue