Re-do previous patch and fix new forward-reference-with-quantification bugs.

This commit is contained in:
Philip.Hazel 2015-05-06 16:51:25 +00:00
parent c420d11041
commit e653c5f142
11 changed files with 178 additions and 40 deletions

View File

@ -106,6 +106,11 @@ subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
compile correct code, leading to undefined behaviour or an internally detected
error. This bug was discovered by the LLVM fuzzer.
27. Quantification of certain items (e.g. atomic back references) could cause
incorrect code to be compiled when recursive forward references were involved.
For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was
discovered by the LLVM fuzzer.
Version 10.10 06-March-2015
---------------------------

View File

@ -49,6 +49,17 @@ POSSIBILITY OF SUCH DAMAGE.
#include "pcre2_internal.h"
/* In rare error cases debugging might require calling pcre2_printint(). */
#if 0
#ifdef EBCDIC
#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
#else
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif
#include "pcre2_printint.c"
#define CALL_PRINTINT
#endif
/* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */
@ -3088,7 +3099,6 @@ Arguments:
reqcuflagsptr place to put the last required code unit flags, or a negative number
bcptr points to current branch chain
cond_depth conditional nesting depth
save_hwm_offset high water mark for the start of the group
cb contains pointers to tables etc.
lengthptr NULL during the real compile phase
points to length accumulator during pre-compile phase
@ -3103,7 +3113,6 @@ compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr,
uint32_t *firstcuptr, int32_t *firstcuflagsptr,
uint32_t *reqcuptr, int32_t *reqcuflagsptr,
branch_chain *bcptr, int cond_depth,
size_t save_hwm_offset,
compile_block *cb, size_t *lengthptr)
{
int repeat_min = 0, repeat_max = 0; /* To please picky compilers */
@ -3119,6 +3128,7 @@ int32_t req_caseopt, reqvary, tempreqvary;
int after_manual_callout = 0;
int escape;
size_t length_prevgroup = 0;
size_t item_hwm_offset = 0;
register uint32_t c;
register PCRE2_UCHAR *code = *codeptr;
PCRE2_UCHAR *last_code = code;
@ -3425,6 +3435,7 @@ for (;; ptr++)
zeroreqcu = reqcu;
zeroreqcuflags = reqcuflags;
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY;
break;
@ -3471,6 +3482,7 @@ for (;; ptr++)
/* Handle a real character class. */
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if
they are encountered at the top level, so we'll do that too. */
@ -4540,7 +4552,7 @@ for (;; ptr++)
{
register int i;
int len = (int)(code - previous);
size_t base_hwm_offset = save_hwm_offset;
size_t base_hwm_offset = item_hwm_offset;
PCRE2_UCHAR *bralink = NULL;
PCRE2_UCHAR *brazeroptr = NULL;
@ -4597,7 +4609,7 @@ for (;; ptr++)
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
{
*code = OP_END;
adjust_recurse(previous, 1, utf, cb, save_hwm_offset);
adjust_recurse(previous, 1, utf, cb, item_hwm_offset);
memmove(previous + 1, previous, CU2BYTES(len));
code++;
if (repeat_max == 0)
@ -4621,7 +4633,7 @@ for (;; ptr++)
{
int offset;
*code = OP_END;
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm_offset);
adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, item_hwm_offset);
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
@ -4879,7 +4891,7 @@ for (;; ptr++)
{
int nlen = (int)(code - bracode);
*code = OP_END;
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, item_hwm_offset);
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
@ -5014,7 +5026,7 @@ for (;; ptr++)
else
{
*code = OP_END;
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, item_hwm_offset);
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
@ -5190,7 +5202,6 @@ for (;; ptr++)
newoptions = options;
skipunits = 0;
bravalue = OP_CBRA;
save_hwm_offset = cb->hwm - cb->start_workspace;
reset_bracount = FALSE;
/* Deal with the extended parentheses; all are introduced by '?', and the
@ -6010,6 +6021,7 @@ for (;; ptr++)
{
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
PUT2INC(code, 0, index);
PUT2INC(code, 0, count);
@ -6123,6 +6135,7 @@ for (;; ptr++)
HANDLE_RECURSION:
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
called = cb->start_code;
/* When we are actually compiling, find the bracket that is being
@ -6324,7 +6337,11 @@ for (;; ptr++)
previous = NULL;
cb->iscondassert = FALSE;
}
else previous = code;
else
{
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
}
*code = bravalue;
tempcode = code;
@ -6574,9 +6591,6 @@ for (;; ptr++)
PCRE2_SPTR p;
uint32_t cf;
/* Normally save_hwm_offset is set when '(' is read */
save_hwm_offset = cb->hwm - cb->start_workspace;
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
@ -6644,6 +6658,7 @@ for (;; ptr++)
HANDLE_REFERENCE:
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;
PUT2INC(code, 0, recno);
cb->backref_map |= (recno < 32)? (1 << recno) : 1;
@ -6673,6 +6688,7 @@ for (;; ptr++)
if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
goto FAILED;
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
*code++ = ptype;
*code++ = pdata;
@ -6721,6 +6737,7 @@ for (;; ptr++)
{
previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
item_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
}
}
@ -6755,6 +6772,7 @@ for (;; ptr++)
ONE_CHAR:
previous = code;
item_hwm_offset = cb->hwm - cb->start_workspace;
/* For caseless UTF mode, check whether this character has more than one
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
@ -6980,7 +6998,7 @@ for (;;)
if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu,
&branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
cond_depth, save_hwm_offset, cb, (lengthptr == NULL)? NULL : &length))
cond_depth, cb, (lengthptr == NULL)? NULL : &length))
{
*ptrptr = ptr;
return FALSE;
@ -7992,6 +8010,8 @@ if (cb.names_found > 0)
error, errorcode will be set non-zero, so we don't need to look at the result
of the function here. */
/* fprintf(stderr, "+++\n\nPASS TWO\n"); */
ptr = pattern + skipatstart;
code = (PCRE2_UCHAR *)codestart;
*code = OP_BRA;
@ -8026,6 +8046,13 @@ if (usedlength > length) errorcode = ERR23; else
#endif
}
/* In rare debugging situations we sometimes need to look at the compiled code
at this stage. */
#ifdef CALL_PRINTINT
pcre2_printint(re, stderr, TRUE);
#endif
/* Fill in any forward references that are required. There may be repeated
references; optimize for them, as searching a large regex takes time. The
test of errorcode inside the loop means that nothing is done if it is already
@ -8041,6 +8068,9 @@ if (cb.hwm > cb.start_workspace)
cb.hwm -= LINK_SIZE;
offset = GET(cb.hwm, 0);
recno = GET(codestart, offset);
/* fprintf(stderr, "+++offset=%d recno=%d\n", offset, recno); */
if (recno != prev_recno)
{
groupptr = PRIV(find_bracket)(codestart, utf, recno);

View File

@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
internal form of a compiled regular expression, along with some supporting
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. */
that comprise the library. It can also optionally be included in
pcre2_compile.c for detailed debugging in error situations. */
/* Tables of operator names. The same 8-bit table is used for all code unit
@ -138,9 +139,9 @@ if ((c & 0xc0) != 0xc0)
else
{
int i;
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & utf8_table3[a]) << s;
c = (c & PRIV(utf8_table3)[a]) << s;
for (i = 1; i <= a; i++)
{
if ((ptr[i] & 0xc0) != 0x80)
@ -223,12 +224,11 @@ get_ucpname(unsigned int ptype, unsigned int pvalue)
{
#ifdef SUPPORT_UNICODE
int i;
for (i = utt_size - 1; i >= 0; i--)
for (i = PRIV(utt_size) - 1; i >= 0; i--)
{
if (ptype == utt[i].type && pvalue == utt[i].value) break;
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
}
return (i >= 0)? utt_names + utt[i].name_offset : "??";
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
#else /* No UTF support */
(void)ptype;
(void)pvalue;
@ -266,7 +266,7 @@ if (code[1] != PT_CLIST)
else
{
const char *not = (*code == OP_PROP)? "" : "not ";
const uint32_t *p = ucd_caseless_sets + code[2];
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
fprintf (f, "%s%sclist", before, not);
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
fprintf(f, "%s", after);
@ -286,7 +286,7 @@ bytecode can be written that do not depend on the value of LINK_SIZE.
Arguments:
re a compiled pattern
f the file to write to
print_lenghts show various lengths
print_lengths show various lengths
Returns: nothing
*/

3
testdata/testinput1 vendored
View File

@ -5721,4 +5721,7 @@ name)/mark
/A[\8]B[\9]C/
A8B9C
/(?1)()((((((\1++))\x85)+)|))/
\x85\x85
# End of testinput1

4
testdata/testinput2 vendored
View File

@ -4294,6 +4294,8 @@ a random value. /Ix
/.((?3)(?R)()(?2)|\1|$)()/B
/(?1)()((((((\1++))\x85)+)|))/
/(\9*+(?2);\3++()2|)++{/
/\V\x85\9*+((?2)\3++()2)*:2/
# End of testinput2

2
testdata/testinput8 vendored
View File

@ -146,4 +146,6 @@
/.((?3)(?R)()(?2)|\1|$)()/
/(?1)()((((((\1++))\x85)+)|))/
# End of testinput8

11
testdata/testoutput1 vendored
View File

@ -9447,4 +9447,15 @@ No match
A8B9C
0: A8B9C
/(?1)()((((((\1++))\x85)+)|))/
\x85\x85
0: \x85\x85
1:
2: \x85\x85
3: \x85\x85
4: \x85\x85
5: \x85
6:
7:
# End of testinput1

View File

@ -14391,6 +14391,10 @@ Failed: error 115 at offset 7: reference to non-existent subpattern
End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
/(\9*+(?2);\3++()2|)++{/
Failed: error 115 at offset 22: reference to non-existent subpattern
/\V\x85\9*+((?2)\3++()2)*:2/
Failed: error 115 at offset 26: reference to non-existent subpattern
# End of testinput2

View File

@ -813,4 +813,31 @@ Memory allocation (code space): 14
37 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
------------------------------------------------------------------
0 50 Bra
2 4 Recurse
4 3 CBra 1
7 3 Ket
9 39 CBra 2
12 32 CBra 3
15 27 CBra 4
18 22 CBra 5
21 15 CBra 6
24 10 CBra 7
27 5 Once
29 \1+
32 5 Ket
34 10 Ket
36 15 Ket
38 \x{85}
40 22 KetRmax
42 27 Ket
44 2 Alt
46 34 Ket
48 39 Ket
50 50 Ket
52 End
------------------------------------------------------------------
# End of testinput8

View File

@ -813,4 +813,31 @@ Memory allocation (code space): 28
37 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
------------------------------------------------------------------
0 50 Bra
2 4 Recurse
4 3 CBra 1
7 3 Ket
9 39 CBra 2
12 32 CBra 3
15 27 CBra 4
18 22 CBra 5
21 15 CBra 6
24 10 CBra 7
27 5 Once
29 \1+
32 5 Ket
34 10 Ket
36 15 Ket
38 \x{85}
40 22 KetRmax
42 27 Ket
44 2 Alt
46 34 Ket
48 39 Ket
50 50 Ket
52 End
------------------------------------------------------------------
# End of testinput8

View File

@ -813,4 +813,31 @@ Memory allocation (code space): 10
56 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
------------------------------------------------------------------
0 77 Bra
3 6 Recurse
6 5 CBra 1
11 5 Ket
14 60 CBra 2
19 49 CBra 3
24 41 CBra 4
29 33 CBra 5
34 23 CBra 6
39 15 CBra 7
44 7 Once
47 \1+
51 7 Ket
54 15 Ket
57 23 Ket
60 \x{85}
62 33 KetRmax
65 41 Ket
68 3 Alt
71 52 Ket
74 60 Ket
77 77 Ket
80 End
------------------------------------------------------------------
# End of testinput8