Fix bug when \K is used in a lookbehind in a substitute pattern.
This commit is contained in:
parent
9de1a271a0
commit
89c2a02027
|
@ -2569,7 +2569,7 @@ calls to <b>pcre2_match()</b> if you are making repeated calls to find other
|
|||
matches in the same subject string.
|
||||
</P>
|
||||
<P>
|
||||
WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
|
||||
<b>Warning:</b> When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
|
||||
string as a subject, or an invalid value of <i>startoffset</i>, is undefined.
|
||||
Your program may crash or loop indefinitely.
|
||||
<pre>
|
||||
|
@ -2776,6 +2776,15 @@ branch of the group, but it is not on the matching path. On the other hand,
|
|||
when this pattern fails to match "bx", the returned name is B.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> By default, certain start-of-match optimizations are used to
|
||||
give a fast "no match" result in some situations. For example, if the anchoring
|
||||
is removed from the pattern above, there is an initial check for the presence
|
||||
of "c" in the subject before running the matching engine. This check fails for
|
||||
"bx", causing a match failure without seeing any marks. You can disable the
|
||||
start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
|
||||
<b>pcre2_compile()</b> or starting the pattern with (*NO_START_OPT).
|
||||
</P>
|
||||
<P>
|
||||
After a successful match, a partial match, or one of the invalid UTF errors
|
||||
(for example, PCRE2_ERROR_UTF8_ERR5), <b>pcre2_get_startchar()</b> can be
|
||||
called. After a successful or partial match it returns the code unit offset of
|
||||
|
@ -3330,7 +3339,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
|
|||
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
|
||||
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
|
||||
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
|
||||
it started, which can happen if \K is used in an assertion).
|
||||
it started or the match started earlier than the current position in the
|
||||
subject, which can happen if \K is used in an assertion).
|
||||
</P>
|
||||
<P>
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
|
@ -3604,7 +3614,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 27 April 2018
|
||||
Last updated: 22 June 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -2519,7 +2519,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
|||
second and subsequent calls to pcre2_match() if you are making repeated
|
||||
calls to find other matches in the same subject string.
|
||||
|
||||
WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
|
||||
Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
|
||||
invalid string as a subject, or an invalid value of startoffset, is
|
||||
undefined. Your program may crash or loop indefinitely.
|
||||
|
||||
|
@ -2704,6 +2704,15 @@ OTHER INFORMATION ABOUT A MATCH
|
|||
the other hand, when this pattern fails to match "bx", the returned
|
||||
name is B.
|
||||
|
||||
Warning: By default, certain start-of-match optimizations are used to
|
||||
give a fast "no match" result in some situations. For example, if the
|
||||
anchoring is removed from the pattern above, there is an initial check
|
||||
for the presence of "c" in the subject before running the matching
|
||||
engine. This check fails for "bx", causing a match failure without see-
|
||||
ing any marks. You can disable the start-of-match optimizations by set-
|
||||
ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or starting
|
||||
the pattern with (*NO_START_OPT).
|
||||
|
||||
After a successful match, a partial match, or one of the invalid UTF
|
||||
errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
|
||||
be called. After a successful or partial match it returns the code unit
|
||||
|
@ -3230,7 +3239,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP-
|
||||
MISSINGBRACE (closing curly bracket not found), PCRE2_ERROR_BADSUBSTI-
|
||||
TUTION (syntax error in extended group substitution), and
|
||||
PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started,
|
||||
PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started
|
||||
or the match started earlier than the current position in the subject,
|
||||
which can happen if \K is used in an assertion).
|
||||
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
|
@ -3484,7 +3494,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 27 April 2018
|
||||
Last updated: 22 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "18 June 2018" "PCRE2 10.32"
|
||||
.TH PCRE2API 3 "22 June 2018" "PCRE2 10.32"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -3328,7 +3328,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
|
|||
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
|
||||
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
|
||||
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
|
||||
it started, which can happen if \eK is used in an assertion).
|
||||
it started or the match started earlier than the current position in the
|
||||
subject, which can happen if \eK is used in an assertion).
|
||||
.P
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
obtained by calling the \fBpcre2_get_error_message()\fP function (see
|
||||
|
@ -3621,6 +3622,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 18 June 2018
|
||||
Last updated: 22 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -255,7 +255,7 @@ static const unsigned char match_error_texts[] =
|
|||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
"match with end before start or start moved backwards is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
|
|
|
@ -361,9 +361,9 @@ do
|
|||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
|
|
|
@ -4644,6 +4644,9 @@ B)x/alt_verbnames,mark
|
|||
/(?=a\K)/replace=z
|
||||
BaCaD
|
||||
|
||||
/(?<=\K.)/g,replace=-
|
||||
ab
|
||||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
|
||||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
|
||||
|
|
|
@ -14899,7 +14899,11 @@ Subject length lower bound = 1
|
|||
|
||||
/(?=a\K)/replace=z
|
||||
BaCaD
|
||||
Failed: error -60: match with end before start is not supported
|
||||
Failed: error -60: match with end before start or start moved backwards is not supported
|
||||
|
||||
/(?<=\K.)/g,replace=-
|
||||
ab
|
||||
Failed: error -60: match with end before start or start moved backwards is not supported
|
||||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
|
||||
Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
|
||||
|
|
Loading…
Reference in New Issue