Fix bad behaviour for subroutine call in lookbehind when the called subroutine
contained an option setting such as (?s) and PCRE2_ANCHORED was set.
This commit is contained in:
parent
2aec84e37e
commit
4c69f50e69
|
@ -89,6 +89,12 @@ copied).
|
|||
|
||||
(k) If parsing a pattern exactly filled the buffer, the internal test for
|
||||
overrun did not check when the final META_END item was added.
|
||||
|
||||
(l) If a lookbehind contained a subroutine call, and the called group
|
||||
contained an option setting such as (?s), and the PCRE2_ANCHORED option
|
||||
was set, unpredictable behaviour could occur. The underlying bug was
|
||||
incorrect code and insufficient checking while searching for the end of
|
||||
the called subroutine in the parsed pattern.
|
||||
|
||||
4. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
|
|
10
HACKING
10
HACKING
|
@ -240,8 +240,8 @@ META_RECURSE is always followed by an offset, for use in error messages.
|
|||
META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
|
||||
element contains the 16-bit type and data property values, packed together.
|
||||
ESC_g and ESC_k are used only for named references - numerical ones are turned
|
||||
into META_RECURSE or META_BACKREF as appropriate. They are followed by a length
|
||||
and an offset into the pattern to specify the name.
|
||||
into META_RECURSE or META_BACKREF as appropriate. ESC_g and ESC_k are followed
|
||||
by a length and an offset into the pattern to specify the name.
|
||||
|
||||
The following have one data item that follows in the next vector element:
|
||||
|
||||
|
@ -279,10 +279,6 @@ The following is followed just by an offset, for use in error messages:
|
|||
|
||||
META_COND_DEFINE (?(DEFINE)
|
||||
|
||||
In fact, META_COND_ASSERT is used for any group starting (?( that does not
|
||||
match any of the other META_COND cases. The check that this group is an
|
||||
assertion (optionally preceded by a callout) happens at compile time.
|
||||
|
||||
The following are also followed just by an offset, but also the lower 16 bits
|
||||
of the main word contain the length of the first branch of the lookbehind
|
||||
group; this is used when generating OP_REVERSE for that branch.
|
||||
|
@ -799,4 +795,4 @@ not a real opcode, but is used to check that tables indexed by opcode are the
|
|||
correct length, in order to catch updating errors.
|
||||
|
||||
Philip Hazel
|
||||
October 2016
|
||||
November 2016
|
||||
|
|
2
RunTest
2
RunTest
|
@ -502,7 +502,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190 >>testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
echo " "
|
||||
|
|
|
@ -198,7 +198,10 @@ don't have to check them every time. */
|
|||
unsigned ints. Values less than META_END are literal data values. The coding
|
||||
for identifying the item is in the top 16-bits, leaving 16 bits for the
|
||||
additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
|
||||
macros are used to manipulate parsed pattern elements. */
|
||||
macros are used to manipulate parsed pattern elements.
|
||||
|
||||
NOTE: When these definitions are changed, the table of extra lengths for each
|
||||
code (meta_extra_lengths, just below) must be updated to remain in step. */
|
||||
|
||||
#define META_END 0x80000000u /* End of pattern */
|
||||
|
||||
|
@ -277,6 +280,73 @@ versions. */
|
|||
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||
|
||||
/* Table of extra lengths for each of the meta codes. Must be kept in step with
|
||||
the definitions above. For some items these values are a basic length to which
|
||||
a variable amount has to be added. */
|
||||
|
||||
static unsigned char meta_extra_lengths[] = {
|
||||
0, /* META_END */
|
||||
0, /* META_ALT */
|
||||
0, /* META_ATOMIC */
|
||||
0, /* META_BACKREF - more if group is >= 10 */
|
||||
1+SIZEOFFSET, /* META_BACKREF_BYNAME */
|
||||
1, /* META_BIGVALUE */
|
||||
3, /* META_CALLOUT_NUMBER */
|
||||
3+SIZEOFFSET, /* META_CALLOUT_STRING */
|
||||
0, /* META_CAPTURE */
|
||||
0, /* META_CIRCUMFLEX */
|
||||
0, /* META_CLASS */
|
||||
0, /* META_CLASS_EMPTY */
|
||||
0, /* META_CLASS_EMPTY_NOT */
|
||||
0, /* META_CLASS_END */
|
||||
0, /* META_CLASS_NOT */
|
||||
0, /* META_COND_ASSERT */
|
||||
SIZEOFFSET, /* META_COND_DEFINE */
|
||||
1+SIZEOFFSET, /* META_COND_NAME */
|
||||
1+SIZEOFFSET, /* META_COND_NUMBER */
|
||||
1+SIZEOFFSET, /* META_COND_RNAME */
|
||||
1+SIZEOFFSET, /* META_COND_RNUMBER */
|
||||
3, /* META_COND_VERSION */
|
||||
0, /* META_DOLLAR */
|
||||
0, /* META_DOT */
|
||||
0, /* META_ESCAPE - more for ESC_P, ESC_p, ESC_g, ESC_k */
|
||||
0, /* META_KET */
|
||||
0, /* META_NOCAPTURE */
|
||||
1, /* META_OPTIONS */
|
||||
1, /* META_POSIX */
|
||||
1, /* META_POSIX_NEG */
|
||||
0, /* META_RANGE_ESCAPED */
|
||||
0, /* META_RANGE_LITERAL */
|
||||
SIZEOFFSET, /* META_RECURSE */
|
||||
1+SIZEOFFSET, /* META_RECURSE_BYNAME */
|
||||
0, /* META_LOOKAHEAD */
|
||||
0, /* META_LOOKAHEADNOT */
|
||||
SIZEOFFSET, /* META_LOOKBEHIND */
|
||||
SIZEOFFSET, /* META_LOOKBEHINDNOT */
|
||||
1, /* META_MARK - plus the string length */
|
||||
0, /* META_ACCEPT */
|
||||
0, /* META_COMMIT */
|
||||
0, /* META_FAIL */
|
||||
0, /* META_PRUNE */
|
||||
1, /* META_PRUNE_ARG - plus the string length */
|
||||
0, /* META_SKIP */
|
||||
1, /* META_SKIP_ARG - plus the string length */
|
||||
0, /* META_THEN */
|
||||
1, /* META_THEN_ARG - plus the string length */
|
||||
0, /* META_ASTERISK */
|
||||
0, /* META_ASTERISK_PLUS */
|
||||
0, /* META_ASTERISK_QUERY */
|
||||
0, /* META_PLUS */
|
||||
0, /* META_PLUS_PLUS */
|
||||
0, /* META_PLUS_QUERY */
|
||||
0, /* META_QUERY */
|
||||
0, /* META_QUERY_PLUS */
|
||||
0, /* META_QUERY_QUERY */
|
||||
2, /* META_MINMAX */
|
||||
2, /* META_MINMAX_PLUS */
|
||||
2 /* META_MINMAX_QUERY */
|
||||
};
|
||||
|
||||
/* Types for skipping parts of a parsed pattern. */
|
||||
|
||||
enum { PSKIP_ALT, PSKIP_CLASS, PSKIP_KET };
|
||||
|
@ -318,8 +388,8 @@ comparison). */
|
|||
locale, and may mark arbitrary characters as digits. We want to recognize only
|
||||
0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It
|
||||
costs 256 bytes, but it is a lot faster than doing character value tests (at
|
||||
least in some simple cases I timed), and in some applications one wants PCRE to
|
||||
compile efficiently as well as match efficiently. The value in the table is
|
||||
least in some simple cases I timed), and in some applications one wants PCRE2
|
||||
to compile efficiently as well as match efficiently. The value in the table is
|
||||
the binary hex digit value, or 0xff for non-hex digits. */
|
||||
|
||||
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
|
||||
|
@ -646,7 +716,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89 };
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -1036,7 +1106,7 @@ if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
|||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
(*ref_count)++;
|
||||
}
|
||||
|
||||
|
||||
return newcode;
|
||||
}
|
||||
|
||||
|
@ -5325,7 +5395,7 @@ for (;; pptr++)
|
|||
|
||||
/* Any other non-literal must be an escape */
|
||||
|
||||
if (meta > META_END)
|
||||
if (meta >= META_END)
|
||||
{
|
||||
if (META_CODE(meta) != META_ESCAPE)
|
||||
{
|
||||
|
@ -8240,6 +8310,7 @@ Arguments:
|
|||
PSKIP_KET when only META_KET ends the skip
|
||||
|
||||
Returns: new value of pptr
|
||||
NULL if META_END is reached - should never occur
|
||||
*/
|
||||
|
||||
static uint32_t *
|
||||
|
@ -8249,12 +8320,47 @@ uint32_t nestlevel = 0;
|
|||
|
||||
for (pptr += 1;; pptr++)
|
||||
{
|
||||
switch(META_CODE(*pptr))
|
||||
uint32_t meta = META_CODE(*pptr);
|
||||
switch(meta)
|
||||
{
|
||||
case META_BIGVALUE:
|
||||
pptr += 1;
|
||||
default: /* Just skip over most items */
|
||||
break;
|
||||
|
||||
/* This should never occur. */
|
||||
|
||||
case META_END:
|
||||
return NULL;
|
||||
|
||||
/* The data for these items is variable in length. */
|
||||
|
||||
case META_BACKREF: /* Offset is present only if group >= 10 */
|
||||
if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET;
|
||||
break;
|
||||
|
||||
case META_ESCAPE: /* A few escapes are followed by data items. */
|
||||
switch (META_DATA(*pptr))
|
||||
{
|
||||
case ESC_P:
|
||||
case ESC_p:
|
||||
pptr += 1;
|
||||
break;
|
||||
|
||||
case ESC_g:
|
||||
case ESC_k:
|
||||
pptr += 1 + SIZEOFFSET;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case META_MARK: /* Add the length of the name. */
|
||||
case META_PRUNE_ARG:
|
||||
case META_SKIP_ARG:
|
||||
case META_THEN_ARG:
|
||||
pptr += pptr[1];
|
||||
break;
|
||||
|
||||
/* These are the "active" items in this loop. */
|
||||
|
||||
case META_CLASS_END:
|
||||
if (skiptype == PSKIP_CLASS) return pptr;
|
||||
break;
|
||||
|
@ -8284,10 +8390,13 @@ for (pptr += 1;; pptr++)
|
|||
if (nestlevel == 0) return pptr;
|
||||
nestlevel--;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* The extra data item length for each meta is in a table. */
|
||||
|
||||
meta = (meta & 0x0fff0000u) >> 16;
|
||||
if (meta >= sizeof(meta_extra_lengths)) return NULL;
|
||||
pptr += meta_extra_lengths[meta];
|
||||
}
|
||||
/* Control never reaches here */
|
||||
return pptr;
|
||||
|
@ -8427,6 +8536,7 @@ for (;; pptr++)
|
|||
case META_ACCEPT:
|
||||
case META_FAIL:
|
||||
pptr = parsed_skip(pptr, PSKIP_ALT);
|
||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
goto EXIT;
|
||||
|
||||
case META_MARK:
|
||||
|
@ -8457,6 +8567,7 @@ for (;; pptr++)
|
|||
case META_CLASS_NOT:
|
||||
itemlength = 1;
|
||||
pptr = parsed_skip(pptr, PSKIP_CLASS);
|
||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
break;
|
||||
|
||||
case META_CLASS_EMPTY_NOT:
|
||||
|
@ -8498,6 +8609,7 @@ for (;; pptr++)
|
|||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
pptr = parsed_skip(pptr, PSKIP_KET);
|
||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
break;
|
||||
|
||||
/* Lookbehinds can be ignored, but must themselves be checked. */
|
||||
|
@ -8595,6 +8707,7 @@ for (;; pptr++)
|
|||
}
|
||||
|
||||
gptrend = parsed_skip(gptr, PSKIP_KET);
|
||||
if (gptrend == NULL) goto PARSED_SKIP_FAILED;
|
||||
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
|
||||
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
|
||||
|
@ -8685,6 +8798,10 @@ EXIT:
|
|||
*pptrptr = pptr;
|
||||
if (branchlength > cb->max_lookbehind) cb->max_lookbehind = branchlength;
|
||||
return branchlength;
|
||||
|
||||
PARSED_SKIP_FAILED:
|
||||
*errcodeptr = ERR90;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -175,6 +175,7 @@ static const unsigned char compile_error_texts[] =
|
|||
"pattern string is longer than the limit set by the application\0"
|
||||
"internal error: unknown code in parsed pattern\0"
|
||||
/* 90 */
|
||||
"internal error: bad code value in parsed_skip()\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -4916,4 +4916,6 @@ a)"xI
|
|||
|
||||
/(?(?(?(?(?(?))))))/
|
||||
|
||||
/(?<=(?1))((?s))/anchored
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -15367,6 +15367,8 @@ Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
|
|||
/(?(?(?(?(?(?))))))/
|
||||
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
|
||||
|
||||
/(?<=(?1))((?s))/anchored
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
@ -15376,4 +15378,5 @@ Error 0: PCRE2_ERROR_BADDATA (unknown error number)
|
|||
Error 100: no error
|
||||
Error 188: pattern string is longer than the limit set by the application
|
||||
Error 189: internal error: unknown code in parsed pattern
|
||||
Error 190: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error 190: internal error: bad code value in parsed_skip()
|
||||
Error 191: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
|
|
Loading…
Reference in New Issue