Fix recursive forward reference bug.

This commit is contained in:
Philip.Hazel 2015-05-03 16:46:56 +00:00
parent dc8d444147
commit d5191510db
8 changed files with 211 additions and 19 deletions

View File

@ -100,6 +100,12 @@ behaviour.
25. Static linking against the PCRE2 library using the pkg-config module was
failing on missing pthread symbols.
26. If a group that contained a recursive back reference also contained a
forward reference subroutine call followed by a non-forward-reference
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
compile correct code, leading to undefined behaviour or an internally detected
error. This bug was discovered by the LLVM fuzzer.
Version 10.10 06-March-2015
---------------------------

View File

@ -2605,11 +2605,12 @@ have their offsets adjusted. That is one of the jobs of this function. Before
it is called, the partially compiled regex must be temporarily terminated with
OP_END.
This function has been extended with the possibility of forward references for
recursions and subroutine calls. It must also check the list of such references
for the group we are dealing with. If it finds that one of the recursions in
the current group is on this list, it adjusts the offset in the list, not the
value in the reference (which is a group number).
This function has been extended to cope with forward references for recursions
and subroutine calls. It must check the list of such references for the
group we are dealing with. If it finds that one of the recursions in the
current group is on this list, it does not adjust the value in the reference
(which is a group number). After the group has been scanned, all the offsets in
the forward reference list for the group are adjusted.
Arguments:
group points to the start of the group
@ -2625,29 +2626,24 @@ static void
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
size_t save_hwm_offset)
{
uint32_t offset;
PCRE2_UCHAR *hc;
PCRE2_UCHAR *ptr = group;
/* Scan the group for recursions. For each one found, check the forward
reference list. */
while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
{
int offset;
PCRE2_UCHAR *hc;
/* See if this recursion is on the forward reference list. If so, adjust the
reference. */
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
hc += LINK_SIZE)
{
offset = (int)GET(hc, 0);
if (cb->start_code + offset == ptr + 1)
{
PUT(hc, 0, offset + adjust);
break;
}
if (cb->start_code + offset == ptr + 1) break;
}
/* Otherwise, adjust the recursion offset if it's after the start of this
group. */
/* If we have not found this recursion on the forward reference list, adjust
the recursion's offset if it's after the start of this group. */
if (hc >= cb->hwm)
{
@ -2657,6 +2653,15 @@ while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
ptr += 1 + LINK_SIZE;
}
/* Now adjust all forward reference offsets for the group. */
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
hc += LINK_SIZE)
{
offset = (int)GET(hc, 0);
PUT(hc, 0, offset + adjust);
}
}
@ -7111,7 +7116,8 @@ for (;;)
/* If it was a capturing subpattern, check to see if it contained any
recursive back references. If so, we must wrap it in atomic brackets.
Because we are moving code along, we must ensure that any pending recursive
references are updated. In any event, remove the block from the chain. */
or forward subroutine references are updated. In any event, remove the
block from the chain. */
if (capnumber > 0)
{

4
testdata/testinput2 vendored
View File

@ -4290,4 +4290,8 @@ a random value. /Ix
/A\8B\9C/
A8B9C
/.((?2)(?R)|\1|$)()/B
/.((?3)(?R)()(?2)|\1|$)()/B
# End of testinput2

4
testdata/testinput8 vendored
View File

@ -142,4 +142,8 @@
"(?1)(?#?'){2}(a)"
/.((?2)(?R)|\1|$)()/
/.((?3)(?R)()(?2)|\1|$)()/
# End of testinput8

43
testdata/testoutput2 vendored
View File

@ -14348,4 +14348,47 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
Failed: error 115 at offset 7: reference to non-existent subpattern
A8B9C
/.((?2)(?R)|\1|$)()/B
------------------------------------------------------------------
Bra
Any
Once
CBra 1
Recurse
Recurse
Alt
\1
Alt
$
Ket
Ket
CBra 2
Ket
Ket
End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/B
------------------------------------------------------------------
Bra
Any
Once
CBra 1
Recurse
Recurse
CBra 2
Ket
Recurse
Alt
\1
Alt
$
Ket
Ket
CBra 3
Ket
Ket
End
------------------------------------------------------------------
# End of testinput2

View File

@ -770,4 +770,47 @@ Memory allocation (code space): 14
23 End
------------------------------------------------------------------
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
0 28 Bra
2 Any
3 18 Once
5 7 CBra 1
8 23 Recurse
10 0 Recurse
12 4 Alt
14 \1
16 3 Alt
18 $
19 14 Ket
21 18 Ket
23 3 CBra 2
26 3 Ket
28 28 Ket
30 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
0 35 Bra
2 Any
3 25 Once
5 14 CBra 1
8 30 Recurse
10 0 Recurse
12 3 CBra 2
15 3 Ket
17 12 Recurse
19 4 Alt
21 \1
23 3 Alt
25 $
26 21 Ket
28 25 Ket
30 3 CBra 3
33 3 Ket
35 35 Ket
37 End
------------------------------------------------------------------
# End of testinput8

View File

@ -770,4 +770,47 @@ Memory allocation (code space): 28
23 End
------------------------------------------------------------------
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
0 28 Bra
2 Any
3 18 Once
5 7 CBra 1
8 23 Recurse
10 0 Recurse
12 4 Alt
14 \1
16 3 Alt
18 $
19 14 Ket
21 18 Ket
23 3 CBra 2
26 3 Ket
28 28 Ket
30 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
0 35 Bra
2 Any
3 25 Once
5 14 CBra 1
8 30 Recurse
10 0 Recurse
12 3 CBra 2
15 3 Ket
17 12 Recurse
19 4 Alt
21 \1
23 3 Alt
25 $
26 21 Ket
28 25 Ket
30 3 CBra 3
33 3 Ket
35 35 Ket
37 End
------------------------------------------------------------------
# End of testinput8

View File

@ -770,4 +770,47 @@ Memory allocation (code space): 10
34 End
------------------------------------------------------------------
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
0 42 Bra
3 Any
4 27 Once
7 11 CBra 1
12 34 Recurse
15 0 Recurse
18 6 Alt
21 \1
24 4 Alt
27 $
28 21 Ket
31 27 Ket
34 5 CBra 2
39 5 Ket
42 42 Ket
45 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
0 53 Bra
3 Any
4 38 Once
7 22 CBra 1
12 45 Recurse
15 0 Recurse
18 5 CBra 2
23 5 Ket
26 18 Recurse
29 6 Alt
32 \1
35 4 Alt
38 $
39 32 Ket
42 38 Ket
45 5 CBra 3
50 5 Ket
53 53 Ket
56 End
------------------------------------------------------------------
# End of testinput8