Forbid \K patterns that end before they start in pcre2_substitute().
This commit is contained in:
parent
75181cca2e
commit
2caf22dc61
|
@ -257,6 +257,9 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
|
||||||
|
|
||||||
74. Give an error if a lookbehind assertion is longer than 65535 code units.
|
74. Give an error if a lookbehind assertion is longer than 65535 code units.
|
||||||
|
|
||||||
|
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
|
||||||
|
result of the use of \K).
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -2666,7 +2666,9 @@ same number causes an error at compile time.
|
||||||
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||||
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
||||||
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
||||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||||
|
which a \eK item in a lookahead in the pattern causes the match to end before
|
||||||
|
it starts are not supported, and give rise to an error return.
|
||||||
.P
|
.P
|
||||||
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||||
\fBpcre2_match()\fP, except that the partial matching options are not
|
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||||
|
@ -2769,8 +2771,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
||||||
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||||
errors in the replacement string, with more particular errors being
|
errors in the replacement string, with more particular errors being
|
||||||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
|
||||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
|
||||||
|
PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
|
||||||
PCRE2 errors, a text message that describes the error can be obtained by
|
PCRE2 errors, a text message that describes the error can be obtained by
|
||||||
calling \fBpcre2_get_error_message()\fP.
|
calling \fBpcre2_get_error_message()\fP.
|
||||||
.
|
.
|
||||||
|
@ -3066,6 +3069,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 16 October 2015
|
Last updated: 03 November 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -240,6 +240,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
|
|
@ -240,6 +240,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
|
|
@ -170,8 +170,8 @@ static const char compile_error_texts[] =
|
||||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||||
/* 85 */
|
/* 85 */
|
||||||
"using \\C is disabled in this PCRE2 library\0"
|
"using \\C is disabled in this PCRE2 library\0"
|
||||||
"regular expression is too complicated\0"
|
"regular expression is too complicated\0"
|
||||||
"lookbehind assertion is too long\0"
|
"lookbehind assertion is too long\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
@ -247,7 +247,9 @@ static const char match_error_texts[] =
|
||||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||||
"bad escape sequence in replacement string\0"
|
"bad escape sequence in replacement string\0"
|
||||||
"expected closing curly bracket in replacement string\0"
|
"expected closing curly bracket in replacement string\0"
|
||||||
"bad substitution in replacement string\0"
|
"bad substitution in replacement string\0"
|
||||||
|
/* 60 */
|
||||||
|
"match with end before start is not supported\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||||
constructions. This requires the identification of unescaped : and }
|
constructions. This requires the identification of unescaped : and }
|
||||||
characters. This function scans for such. It must deal with nested ${
|
characters. This function scans for such. It must deal with nested ${
|
||||||
constructions. The pointer to the text is updated, either to the required end
|
constructions. The pointer to the text is updated, either to the required end
|
||||||
character, or to where an error was detected.
|
character, or to where an error was detected.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -107,7 +107,7 @@ for (; ptr < ptrend; ptr++)
|
||||||
|
|
||||||
else if (*ptr == CHAR_BACKSLASH)
|
else if (*ptr == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
int erc;
|
int erc;
|
||||||
int errorcode = 0;
|
int errorcode = 0;
|
||||||
uint32_t ch;
|
uint32_t ch;
|
||||||
|
|
||||||
|
@ -279,10 +279,10 @@ do
|
||||||
|
|
||||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||||
match_data, mcontext);
|
match_data, mcontext);
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Any error other than no match returns the error code. No match when not
|
/* Any error other than no match returns the error code. No match when not
|
||||||
doing the special after-empty-match global rematch, or when at the end of the
|
doing the special after-empty-match global rematch, or when at the end of the
|
||||||
|
@ -320,7 +320,14 @@ do
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle a successful match. */
|
/* Handle a successful match. Matches that use \K to end before they start
|
||||||
|
are not supported. */
|
||||||
|
|
||||||
|
if (ovector[1] < ovector[0])
|
||||||
|
{
|
||||||
|
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
subs++;
|
subs++;
|
||||||
if (rc == 0) rc = ovector_count;
|
if (rc == 0) rc = ovector_count;
|
||||||
|
@ -409,14 +416,14 @@ do
|
||||||
next = *ptr;
|
next = *ptr;
|
||||||
if (next < CHAR_0 || next > CHAR_9) break;
|
if (next < CHAR_0 || next > CHAR_9) break;
|
||||||
group = group * 10 + next - CHAR_0;
|
group = group * 10 + next - CHAR_0;
|
||||||
|
|
||||||
/* A check for a number greater than the hightest captured group
|
/* A check for a number greater than the hightest captured group
|
||||||
is sufficient here; no need for a separate overflow check. */
|
is sufficient here; no need for a separate overflow check. */
|
||||||
|
|
||||||
if (group > code->top_bracket)
|
if (group > code->top_bracket)
|
||||||
{
|
{
|
||||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||||
goto PTREXIT;
|
goto PTREXIT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -439,7 +446,7 @@ do
|
||||||
|
|
||||||
if (inparens)
|
if (inparens)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
|
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
|
||||||
{
|
{
|
||||||
special = *(++ptr);
|
special = *(++ptr);
|
||||||
|
@ -501,8 +508,8 @@ do
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PCRE2_SPTR subptr, subptrend;
|
PCRE2_SPTR subptr, subptrend;
|
||||||
|
|
||||||
/* Find a number for a named group. In case there are duplicate names,
|
/* Find a number for a named group. In case there are duplicate names,
|
||||||
search for the first one that is set. */
|
search for the first one that is set. */
|
||||||
|
|
||||||
if (group < 0)
|
if (group < 0)
|
||||||
|
@ -516,18 +523,18 @@ do
|
||||||
if (ng < ovector_count)
|
if (ng < ovector_count)
|
||||||
{
|
{
|
||||||
if (group < 0) group = ng; /* First in ovector */
|
if (group < 0) group = ng; /* First in ovector */
|
||||||
if (ovector[ng*2] != PCRE2_UNSET)
|
if (ovector[ng*2] != PCRE2_UNSET)
|
||||||
{
|
{
|
||||||
group = ng; /* First that is set */
|
group = ng; /* First that is set */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If group is still negative, it means we did not find a group that
|
/* If group is still negative, it means we did not find a group that
|
||||||
is in the ovector. Just set the first group. */
|
is in the ovector. Just set the first group. */
|
||||||
|
|
||||||
if (group < 0) group = GET2(first, 0);
|
if (group < 0) group = GET2(first, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||||
|
|
|
@ -4596,4 +4596,7 @@ B)x/alt_verbnames,mark
|
||||||
|
|
||||||
/(?<!a{65535})x/I
|
/(?<!a{65535})x/I
|
||||||
|
|
||||||
|
/(?=a\K)/replace=z
|
||||||
|
BaCaD
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14690,4 +14690,8 @@ Max lookbehind = 65535
|
||||||
First code unit = 'x'
|
First code unit = 'x'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/(?=a\K)/replace=z
|
||||||
|
BaCaD
|
||||||
|
Failed: error -60: match with end before start is not supported
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue