Forbid \K patterns that end before they start in pcre2_substitute().
This commit is contained in:
parent
75181cca2e
commit
2caf22dc61
|
@ -257,6 +257,9 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
|
|||
|
||||
74. Give an error if a lookbehind assertion is longer than 65535 code units.
|
||||
|
||||
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
|
||||
result of the use of \K).
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
|
||||
.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -2666,7 +2666,9 @@ same number causes an error at compile time.
|
|||
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
||||
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \eK item in a lookahead in the pattern causes the match to end before
|
||||
it starts are not supported, and give rise to an error return.
|
||||
.P
|
||||
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||
|
@ -2769,8 +2771,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
|||
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||
errors in the replacement string, with more particular errors being
|
||||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
|
||||
PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
|
||||
PCRE2 errors, a text message that describes the error can be obtained by
|
||||
calling \fBpcre2_get_error_message()\fP.
|
||||
.
|
||||
|
@ -3066,6 +3069,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 03 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -240,6 +240,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
|
|
@ -240,6 +240,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
|
|
@ -170,8 +170,8 @@ static const char compile_error_texts[] =
|
|||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
@ -247,7 +247,9 @@ static const char match_error_texts[] =
|
|||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
|
@ -107,7 +107,7 @@ for (; ptr < ptrend; ptr++)
|
|||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int erc;
|
||||
int errorcode = 0;
|
||||
uint32_t ch;
|
||||
|
||||
|
@ -279,10 +279,10 @@ do
|
|||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
|
@ -320,7 +320,14 @@ do
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. */
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
subs++;
|
||||
if (rc == 0) rc = ovector_count;
|
||||
|
@ -409,14 +416,14 @@ do
|
|||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + next - CHAR_0;
|
||||
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. */
|
||||
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -439,7 +446,7 @@ do
|
|||
|
||||
if (inparens)
|
||||
{
|
||||
|
||||
|
||||
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
|
@ -501,8 +508,8 @@ do
|
|||
else
|
||||
{
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. */
|
||||
|
||||
if (group < 0)
|
||||
|
@ -516,18 +523,18 @@ do
|
|||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group that
|
||||
|
||||
/* If group is still negative, it means we did not find a group that
|
||||
is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
|
|
|
@ -4596,4 +4596,7 @@ B)x/alt_verbnames,mark
|
|||
|
||||
/(?<!a{65535})x/I
|
||||
|
||||
/(?=a\K)/replace=z
|
||||
BaCaD
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -14690,4 +14690,8 @@ Max lookbehind = 65535
|
|||
First code unit = 'x'
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?=a\K)/replace=z
|
||||
BaCaD
|
||||
Failed: error -60: match with end before start is not supported
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue