Re-do previous patch and fix new forward-reference-with-quantification bugs.
This commit is contained in:
parent
c420d11041
commit
e653c5f142
|
@ -106,6 +106,11 @@ subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
|
|||
compile correct code, leading to undefined behaviour or an internally detected
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
27. Quantification of certain items (e.g. atomic back references) could cause
|
||||
incorrect code to be compiled when recursive forward references were involved.
|
||||
For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was
|
||||
discovered by the LLVM fuzzer.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -49,6 +49,17 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* In rare error cases debugging might require calling pcre2_printint(). */
|
||||
|
||||
#if 0
|
||||
#ifdef EBCDIC
|
||||
#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
|
||||
#else
|
||||
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
|
||||
#endif
|
||||
#include "pcre2_printint.c"
|
||||
#define CALL_PRINTINT
|
||||
#endif
|
||||
|
||||
/* There are a few things that vary with different code unit sizes. Handle them
|
||||
by defining macros in order to minimize #if usage. */
|
||||
|
@ -3088,7 +3099,6 @@ Arguments:
|
|||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
||||
bcptr points to current branch chain
|
||||
cond_depth conditional nesting depth
|
||||
save_hwm_offset high water mark for the start of the group
|
||||
cb contains pointers to tables etc.
|
||||
lengthptr NULL during the real compile phase
|
||||
points to length accumulator during pre-compile phase
|
||||
|
@ -3103,7 +3113,6 @@ compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr,
|
|||
uint32_t *firstcuptr, int32_t *firstcuflagsptr,
|
||||
uint32_t *reqcuptr, int32_t *reqcuflagsptr,
|
||||
branch_chain *bcptr, int cond_depth,
|
||||
size_t save_hwm_offset,
|
||||
compile_block *cb, size_t *lengthptr)
|
||||
{
|
||||
int repeat_min = 0, repeat_max = 0; /* To please picky compilers */
|
||||
|
@ -3119,6 +3128,7 @@ int32_t req_caseopt, reqvary, tempreqvary;
|
|||
int after_manual_callout = 0;
|
||||
int escape;
|
||||
size_t length_prevgroup = 0;
|
||||
size_t item_hwm_offset = 0;
|
||||
register uint32_t c;
|
||||
register PCRE2_UCHAR *code = *codeptr;
|
||||
PCRE2_UCHAR *last_code = code;
|
||||
|
@ -3425,6 +3435,7 @@ for (;; ptr++)
|
|||
zeroreqcu = reqcu;
|
||||
zeroreqcuflags = reqcuflags;
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
*code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY;
|
||||
break;
|
||||
|
||||
|
@ -3471,6 +3482,7 @@ for (;; ptr++)
|
|||
/* Handle a real character class. */
|
||||
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
|
||||
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if
|
||||
they are encountered at the top level, so we'll do that too. */
|
||||
|
@ -4540,7 +4552,7 @@ for (;; ptr++)
|
|||
{
|
||||
register int i;
|
||||
int len = (int)(code - previous);
|
||||
size_t base_hwm_offset = save_hwm_offset;
|
||||
size_t base_hwm_offset = item_hwm_offset;
|
||||
PCRE2_UCHAR *bralink = NULL;
|
||||
PCRE2_UCHAR *brazeroptr = NULL;
|
||||
|
||||
|
@ -4597,7 +4609,7 @@ for (;; ptr++)
|
|||
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
|
||||
{
|
||||
*code = OP_END;
|
||||
adjust_recurse(previous, 1, utf, cb, save_hwm_offset);
|
||||
adjust_recurse(previous, 1, utf, cb, item_hwm_offset);
|
||||
memmove(previous + 1, previous, CU2BYTES(len));
|
||||
code++;
|
||||
if (repeat_max == 0)
|
||||
|
@ -4621,7 +4633,7 @@ for (;; ptr++)
|
|||
{
|
||||
int offset;
|
||||
*code = OP_END;
|
||||
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, item_hwm_offset);
|
||||
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
|
||||
code += 2 + LINK_SIZE;
|
||||
*previous++ = OP_BRAZERO + repeat_type;
|
||||
|
@ -4879,7 +4891,7 @@ for (;; ptr++)
|
|||
{
|
||||
int nlen = (int)(code - bracode);
|
||||
*code = OP_END;
|
||||
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, item_hwm_offset);
|
||||
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
|
||||
code += 1 + LINK_SIZE;
|
||||
nlen += 1 + LINK_SIZE;
|
||||
|
@ -5014,7 +5026,7 @@ for (;; ptr++)
|
|||
else
|
||||
{
|
||||
*code = OP_END;
|
||||
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
|
||||
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, item_hwm_offset);
|
||||
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
|
||||
code += 1 + LINK_SIZE;
|
||||
len += 1 + LINK_SIZE;
|
||||
|
@ -5190,7 +5202,6 @@ for (;; ptr++)
|
|||
newoptions = options;
|
||||
skipunits = 0;
|
||||
bravalue = OP_CBRA;
|
||||
save_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
reset_bracount = FALSE;
|
||||
|
||||
/* Deal with the extended parentheses; all are introduced by '?', and the
|
||||
|
@ -6010,6 +6021,7 @@ for (;; ptr++)
|
|||
{
|
||||
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
|
||||
PUT2INC(code, 0, index);
|
||||
PUT2INC(code, 0, count);
|
||||
|
@ -6123,6 +6135,7 @@ for (;; ptr++)
|
|||
HANDLE_RECURSION:
|
||||
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
called = cb->start_code;
|
||||
|
||||
/* When we are actually compiling, find the bracket that is being
|
||||
|
@ -6324,7 +6337,11 @@ for (;; ptr++)
|
|||
previous = NULL;
|
||||
cb->iscondassert = FALSE;
|
||||
}
|
||||
else previous = code;
|
||||
else
|
||||
{
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
}
|
||||
|
||||
*code = bravalue;
|
||||
tempcode = code;
|
||||
|
@ -6574,9 +6591,6 @@ for (;; ptr++)
|
|||
PCRE2_SPTR p;
|
||||
uint32_t cf;
|
||||
|
||||
/* Normally save_hwm_offset is set when '(' is read */
|
||||
|
||||
save_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
|
||||
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
|
||||
|
||||
|
@ -6644,6 +6658,7 @@ for (;; ptr++)
|
|||
HANDLE_REFERENCE:
|
||||
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;
|
||||
PUT2INC(code, 0, recno);
|
||||
cb->backref_map |= (recno < 32)? (1 << recno) : 1;
|
||||
|
@ -6673,6 +6688,7 @@ for (;; ptr++)
|
|||
if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
|
||||
goto FAILED;
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
*code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
|
||||
*code++ = ptype;
|
||||
*code++ = pdata;
|
||||
|
@ -6721,6 +6737,7 @@ for (;; ptr++)
|
|||
|
||||
{
|
||||
previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
*code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
|
||||
}
|
||||
}
|
||||
|
@ -6755,6 +6772,7 @@ for (;; ptr++)
|
|||
|
||||
ONE_CHAR:
|
||||
previous = code;
|
||||
item_hwm_offset = cb->hwm - cb->start_workspace;
|
||||
|
||||
/* For caseless UTF mode, check whether this character has more than one
|
||||
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
||||
|
@ -6980,7 +6998,7 @@ for (;;)
|
|||
|
||||
if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu,
|
||||
&branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
|
||||
cond_depth, save_hwm_offset, cb, (lengthptr == NULL)? NULL : &length))
|
||||
cond_depth, cb, (lengthptr == NULL)? NULL : &length))
|
||||
{
|
||||
*ptrptr = ptr;
|
||||
return FALSE;
|
||||
|
@ -7992,6 +8010,8 @@ if (cb.names_found > 0)
|
|||
error, errorcode will be set non-zero, so we don't need to look at the result
|
||||
of the function here. */
|
||||
|
||||
/* fprintf(stderr, "+++\n\nPASS TWO\n"); */
|
||||
|
||||
ptr = pattern + skipatstart;
|
||||
code = (PCRE2_UCHAR *)codestart;
|
||||
*code = OP_BRA;
|
||||
|
@ -8026,6 +8046,13 @@ if (usedlength > length) errorcode = ERR23; else
|
|||
#endif
|
||||
}
|
||||
|
||||
/* In rare debugging situations we sometimes need to look at the compiled code
|
||||
at this stage. */
|
||||
|
||||
#ifdef CALL_PRINTINT
|
||||
pcre2_printint(re, stderr, TRUE);
|
||||
#endif
|
||||
|
||||
/* Fill in any forward references that are required. There may be repeated
|
||||
references; optimize for them, as searching a large regex takes time. The
|
||||
test of errorcode inside the loop means that nothing is done if it is already
|
||||
|
@ -8041,6 +8068,9 @@ if (cb.hwm > cb.start_workspace)
|
|||
cb.hwm -= LINK_SIZE;
|
||||
offset = GET(cb.hwm, 0);
|
||||
recno = GET(codestart, offset);
|
||||
|
||||
/* fprintf(stderr, "+++offset=%d recno=%d\n", offset, recno); */
|
||||
|
||||
if (recno != prev_recno)
|
||||
{
|
||||
groupptr = PRIV(find_bracket)(codestart, utf, recno);
|
||||
|
|
|
@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. */
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
|
@ -138,9 +139,9 @@ if ((c & 0xc0) != 0xc0)
|
|||
else
|
||||
{
|
||||
int i;
|
||||
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & utf8_table3[a]) << s;
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
|
@ -223,12 +224,11 @@ get_ucpname(unsigned int ptype, unsigned int pvalue)
|
|||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int i;
|
||||
for (i = utt_size - 1; i >= 0; i--)
|
||||
for (i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == utt[i].type && pvalue == utt[i].value) break;
|
||||
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
|
||||
}
|
||||
return (i >= 0)? utt_names + utt[i].name_offset : "??";
|
||||
|
||||
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
|
@ -266,7 +266,7 @@ if (code[1] != PT_CLIST)
|
|||
else
|
||||
{
|
||||
const char *not = (*code == OP_PROP)? "" : "not ";
|
||||
const uint32_t *p = ucd_caseless_sets + code[2];
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, not);
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
|
@ -286,7 +286,7 @@ bytecode can be written that do not depend on the value of LINK_SIZE.
|
|||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lenghts show various lengths
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
|
|
@ -5721,4 +5721,7 @@ name)/mark
|
|||
/A[\8]B[\9]C/
|
||||
A8B9C
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
\x85\x85
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -4294,6 +4294,8 @@ a random value. /Ix
|
|||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/B
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
/(\9*+(?2);\3++()2|)++{/
|
||||
|
||||
/\V\x85\9*+((?2)\3++()2)*:2/
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -146,4 +146,6 @@
|
|||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -9447,4 +9447,15 @@ No match
|
|||
A8B9C
|
||||
0: A8B9C
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
\x85\x85
|
||||
0: \x85\x85
|
||||
1:
|
||||
2: \x85\x85
|
||||
3: \x85\x85
|
||||
4: \x85\x85
|
||||
5: \x85
|
||||
6:
|
||||
7:
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -14391,6 +14391,10 @@ Failed: error 115 at offset 7: reference to non-existent subpattern
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
/(\9*+(?2);\3++()2|)++{/
|
||||
Failed: error 115 at offset 22: reference to non-existent subpattern
|
||||
|
||||
/\V\x85\9*+((?2)\3++()2)*:2/
|
||||
Failed: error 115 at offset 26: reference to non-existent subpattern
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -813,4 +813,31 @@ Memory allocation (code space): 14
|
|||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
------------------------------------------------------------------
|
||||
0 50 Bra
|
||||
2 4 Recurse
|
||||
4 3 CBra 1
|
||||
7 3 Ket
|
||||
9 39 CBra 2
|
||||
12 32 CBra 3
|
||||
15 27 CBra 4
|
||||
18 22 CBra 5
|
||||
21 15 CBra 6
|
||||
24 10 CBra 7
|
||||
27 5 Once
|
||||
29 \1+
|
||||
32 5 Ket
|
||||
34 10 Ket
|
||||
36 15 Ket
|
||||
38 \x{85}
|
||||
40 22 KetRmax
|
||||
42 27 Ket
|
||||
44 2 Alt
|
||||
46 34 Ket
|
||||
48 39 Ket
|
||||
50 50 Ket
|
||||
52 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -813,4 +813,31 @@ Memory allocation (code space): 28
|
|||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
------------------------------------------------------------------
|
||||
0 50 Bra
|
||||
2 4 Recurse
|
||||
4 3 CBra 1
|
||||
7 3 Ket
|
||||
9 39 CBra 2
|
||||
12 32 CBra 3
|
||||
15 27 CBra 4
|
||||
18 22 CBra 5
|
||||
21 15 CBra 6
|
||||
24 10 CBra 7
|
||||
27 5 Once
|
||||
29 \1+
|
||||
32 5 Ket
|
||||
34 10 Ket
|
||||
36 15 Ket
|
||||
38 \x{85}
|
||||
40 22 KetRmax
|
||||
42 27 Ket
|
||||
44 2 Alt
|
||||
46 34 Ket
|
||||
48 39 Ket
|
||||
50 50 Ket
|
||||
52 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -813,4 +813,31 @@ Memory allocation (code space): 10
|
|||
56 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?1)()((((((\1++))\x85)+)|))/
|
||||
------------------------------------------------------------------
|
||||
0 77 Bra
|
||||
3 6 Recurse
|
||||
6 5 CBra 1
|
||||
11 5 Ket
|
||||
14 60 CBra 2
|
||||
19 49 CBra 3
|
||||
24 41 CBra 4
|
||||
29 33 CBra 5
|
||||
34 23 CBra 6
|
||||
39 15 CBra 7
|
||||
44 7 Once
|
||||
47 \1+
|
||||
51 7 Ket
|
||||
54 15 Ket
|
||||
57 23 Ket
|
||||
60 \x{85}
|
||||
62 33 KetRmax
|
||||
65 41 Ket
|
||||
68 3 Alt
|
||||
71 52 Ket
|
||||
74 60 Ket
|
||||
77 77 Ket
|
||||
80 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
Loading…
Reference in New Issue