Forbid \K patterns that end before they start in pcre2_substitute().

This commit is contained in:
Philip.Hazel 2015-11-03 17:38:00 +00:00
parent 75181cca2e
commit 2caf22dc61
8 changed files with 49 additions and 25 deletions

View File

@ -257,6 +257,9 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
74. Give an error if a lookbehind assertion is longer than 65535 code units. 74. Give an error if a lookbehind assertion is longer than 65535 code units.
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
result of the use of \K).
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21" .TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -2666,7 +2666,9 @@ same number causes an error at compile time.
This function calls \fBpcre2_match()\fP and then makes a copy of the subject This function calls \fBpcre2_match()\fP and then makes a copy of the subject
string in \fIoutputbuffer\fP, replacing the part that was matched with the string in \fIoutputbuffer\fP, replacing the part that was matched with the
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \eK item in a lookahead in the pattern causes the match to end before
it starts are not supported, and give rise to an error return.
.P .P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not \fBpcre2_match()\fP, except that the partial matching options are not
@ -2769,8 +2771,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
errors in the replacement string, with more particular errors being errors in the replacement string, with more particular errors being
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
PCRE2 errors, a text message that describes the error can be obtained by PCRE2 errors, a text message that describes the error can be obtained by
calling \fBpcre2_get_error_message()\fP. calling \fBpcre2_get_error_message()\fP.
. .
@ -3066,6 +3069,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 16 October 2015 Last updated: 03 November 2015
Copyright (c) 1997-2015 University of Cambridge. Copyright (c) 1997-2015 University of Cambridge.
.fi .fi

View File

@ -240,6 +240,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADREPESCAPE (-57) #define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58) #define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59) #define PCRE2_ERROR_BADSUBSTITUTION (-59)
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */

View File

@ -240,6 +240,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADREPESCAPE (-57) #define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58) #define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59) #define PCRE2_ERROR_BADSUBSTITUTION (-59)
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */

View File

@ -170,8 +170,8 @@ static const char compile_error_texts[] =
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0" "(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
/* 85 */ /* 85 */
"using \\C is disabled in this PCRE2 library\0" "using \\C is disabled in this PCRE2 library\0"
"regular expression is too complicated\0" "regular expression is too complicated\0"
"lookbehind assertion is too long\0" "lookbehind assertion is too long\0"
; ;
/* Match-time and UTF error texts are in the same format. */ /* Match-time and UTF error texts are in the same format. */
@ -247,7 +247,9 @@ static const char match_error_texts[] =
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0" "offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0" "bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0" "expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0" "bad substitution in replacement string\0"
/* 60 */
"match with end before start is not supported\0"
; ;

View File

@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* In extended mode, we recognize ${name:+set text:unset text} and similar /* In extended mode, we recognize ${name:+set text:unset text} and similar
constructions. This requires the identification of unescaped : and } constructions. This requires the identification of unescaped : and }
characters. This function scans for such. It must deal with nested ${ characters. This function scans for such. It must deal with nested ${
constructions. The pointer to the text is updated, either to the required end constructions. The pointer to the text is updated, either to the required end
character, or to where an error was detected. character, or to where an error was detected.
Arguments: Arguments:
@ -107,7 +107,7 @@ for (; ptr < ptrend; ptr++)
else if (*ptr == CHAR_BACKSLASH) else if (*ptr == CHAR_BACKSLASH)
{ {
int erc; int erc;
int errorcode = 0; int errorcode = 0;
uint32_t ch; uint32_t ch;
@ -279,10 +279,10 @@ do
rc = pcre2_match(code, subject, length, start_offset, options|goptions, rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext); match_data, mcontext);
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
#endif #endif
/* Any error other than no match returns the error code. No match when not /* Any error other than no match returns the error code. No match when not
doing the special after-empty-match global rematch, or when at the end of the doing the special after-empty-match global rematch, or when at the end of the
@ -320,7 +320,14 @@ do
continue; continue;
} }
/* Handle a successful match. */ /* Handle a successful match. Matches that use \K to end before they start
are not supported. */
if (ovector[1] < ovector[0])
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
subs++; subs++;
if (rc == 0) rc = ovector_count; if (rc == 0) rc = ovector_count;
@ -409,14 +416,14 @@ do
next = *ptr; next = *ptr;
if (next < CHAR_0 || next > CHAR_9) break; if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0; group = group * 10 + next - CHAR_0;
/* A check for a number greater than the hightest captured group /* A check for a number greater than the hightest captured group
is sufficient here; no need for a separate overflow check. */ is sufficient here; no need for a separate overflow check. */
if (group > code->top_bracket) if (group > code->top_bracket)
{ {
rc = PCRE2_ERROR_NOSUBSTRING; rc = PCRE2_ERROR_NOSUBSTRING;
goto PTREXIT; goto PTREXIT;
} }
} }
} }
@ -439,7 +446,7 @@ do
if (inparens) if (inparens)
{ {
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON) if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
{ {
special = *(++ptr); special = *(++ptr);
@ -501,8 +508,8 @@ do
else else
{ {
PCRE2_SPTR subptr, subptrend; PCRE2_SPTR subptr, subptrend;
/* Find a number for a named group. In case there are duplicate names, /* Find a number for a named group. In case there are duplicate names,
search for the first one that is set. */ search for the first one that is set. */
if (group < 0) if (group < 0)
@ -516,18 +523,18 @@ do
if (ng < ovector_count) if (ng < ovector_count)
{ {
if (group < 0) group = ng; /* First in ovector */ if (group < 0) group = ng; /* First in ovector */
if (ovector[ng*2] != PCRE2_UNSET) if (ovector[ng*2] != PCRE2_UNSET)
{ {
group = ng; /* First that is set */ group = ng; /* First that is set */
break; break;
} }
} }
} }
/* If group is still negative, it means we did not find a group that /* If group is still negative, it means we did not find a group that
is in the ovector. Just set the first group. */ is in the ovector. Just set the first group. */
if (group < 0) group = GET2(first, 0); if (group < 0) group = GET2(first, 0);
} }
rc = pcre2_substring_length_bynumber(match_data, group, &sublength); rc = pcre2_substring_length_bynumber(match_data, group, &sublength);

3
testdata/testinput2 vendored
View File

@ -4596,4 +4596,7 @@ B)x/alt_verbnames,mark
/(?<!a{65535})x/I /(?<!a{65535})x/I
/(?=a\K)/replace=z
BaCaD
# End of testinput2 # End of testinput2

View File

@ -14690,4 +14690,8 @@ Max lookbehind = 65535
First code unit = 'x' First code unit = 'x'
Subject length lower bound = 1 Subject length lower bound = 1
/(?=a\K)/replace=z
BaCaD
Failed: error -60: match with end before start is not supported
# End of testinput2 # End of testinput2