Fix recursive forward reference bug.
This commit is contained in:
parent
dc8d444147
commit
d5191510db
|
@ -100,6 +100,12 @@ behaviour.
|
|||
25. Static linking against the PCRE2 library using the pkg-config module was
|
||||
failing on missing pthread symbols.
|
||||
|
||||
26. If a group that contained a recursive back reference also contained a
|
||||
forward reference subroutine call followed by a non-forward-reference
|
||||
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
|
||||
compile correct code, leading to undefined behaviour or an internally detected
|
||||
error. This bug was discovered by the LLVM fuzzer.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -2605,11 +2605,12 @@ have their offsets adjusted. That is one of the jobs of this function. Before
|
|||
it is called, the partially compiled regex must be temporarily terminated with
|
||||
OP_END.
|
||||
|
||||
This function has been extended with the possibility of forward references for
|
||||
recursions and subroutine calls. It must also check the list of such references
|
||||
for the group we are dealing with. If it finds that one of the recursions in
|
||||
the current group is on this list, it adjusts the offset in the list, not the
|
||||
value in the reference (which is a group number).
|
||||
This function has been extended to cope with forward references for recursions
|
||||
and subroutine calls. It must check the list of such references for the
|
||||
group we are dealing with. If it finds that one of the recursions in the
|
||||
current group is on this list, it does not adjust the value in the reference
|
||||
(which is a group number). After the group has been scanned, all the offsets in
|
||||
the forward reference list for the group are adjusted.
|
||||
|
||||
Arguments:
|
||||
group points to the start of the group
|
||||
|
@ -2625,29 +2626,24 @@ static void
|
|||
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
|
||||
size_t save_hwm_offset)
|
||||
{
|
||||
uint32_t offset;
|
||||
PCRE2_UCHAR *hc;
|
||||
PCRE2_UCHAR *ptr = group;
|
||||
|
||||
/* Scan the group for recursions. For each one found, check the forward
|
||||
reference list. */
|
||||
|
||||
while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
||||
{
|
||||
int offset;
|
||||
PCRE2_UCHAR *hc;
|
||||
|
||||
/* See if this recursion is on the forward reference list. If so, adjust the
|
||||
reference. */
|
||||
|
||||
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
||||
hc += LINK_SIZE)
|
||||
{
|
||||
offset = (int)GET(hc, 0);
|
||||
if (cb->start_code + offset == ptr + 1)
|
||||
{
|
||||
PUT(hc, 0, offset + adjust);
|
||||
break;
|
||||
}
|
||||
if (cb->start_code + offset == ptr + 1) break;
|
||||
}
|
||||
|
||||
/* Otherwise, adjust the recursion offset if it's after the start of this
|
||||
group. */
|
||||
/* If we have not found this recursion on the forward reference list, adjust
|
||||
the recursion's offset if it's after the start of this group. */
|
||||
|
||||
if (hc >= cb->hwm)
|
||||
{
|
||||
|
@ -2657,6 +2653,15 @@ while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
|||
|
||||
ptr += 1 + LINK_SIZE;
|
||||
}
|
||||
|
||||
/* Now adjust all forward reference offsets for the group. */
|
||||
|
||||
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
||||
hc += LINK_SIZE)
|
||||
{
|
||||
offset = (int)GET(hc, 0);
|
||||
PUT(hc, 0, offset + adjust);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -7111,7 +7116,8 @@ for (;;)
|
|||
/* If it was a capturing subpattern, check to see if it contained any
|
||||
recursive back references. If so, we must wrap it in atomic brackets.
|
||||
Because we are moving code along, we must ensure that any pending recursive
|
||||
references are updated. In any event, remove the block from the chain. */
|
||||
or forward subroutine references are updated. In any event, remove the
|
||||
block from the chain. */
|
||||
|
||||
if (capnumber > 0)
|
||||
{
|
||||
|
|
|
@ -4290,4 +4290,8 @@ a random value. /Ix
|
|||
/A\8B\9C/
|
||||
A8B9C
|
||||
|
||||
/.((?2)(?R)|\1|$)()/B
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/B
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -142,4 +142,8 @@
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -14348,4 +14348,47 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
|
|||
Failed: error 115 at offset 7: reference to non-existent subpattern
|
||||
A8B9C
|
||||
|
||||
/.((?2)(?R)|\1|$)()/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Any
|
||||
Once
|
||||
CBra 1
|
||||
Recurse
|
||||
Recurse
|
||||
Alt
|
||||
\1
|
||||
Alt
|
||||
$
|
||||
Ket
|
||||
Ket
|
||||
CBra 2
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Any
|
||||
Once
|
||||
CBra 1
|
||||
Recurse
|
||||
Recurse
|
||||
CBra 2
|
||||
Ket
|
||||
Recurse
|
||||
Alt
|
||||
\1
|
||||
Alt
|
||||
$
|
||||
Ket
|
||||
Ket
|
||||
CBra 3
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 14
|
|||
23 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 28 Bra
|
||||
2 Any
|
||||
3 18 Once
|
||||
5 7 CBra 1
|
||||
8 23 Recurse
|
||||
10 0 Recurse
|
||||
12 4 Alt
|
||||
14 \1
|
||||
16 3 Alt
|
||||
18 $
|
||||
19 14 Ket
|
||||
21 18 Ket
|
||||
23 3 CBra 2
|
||||
26 3 Ket
|
||||
28 28 Ket
|
||||
30 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 35 Bra
|
||||
2 Any
|
||||
3 25 Once
|
||||
5 14 CBra 1
|
||||
8 30 Recurse
|
||||
10 0 Recurse
|
||||
12 3 CBra 2
|
||||
15 3 Ket
|
||||
17 12 Recurse
|
||||
19 4 Alt
|
||||
21 \1
|
||||
23 3 Alt
|
||||
25 $
|
||||
26 21 Ket
|
||||
28 25 Ket
|
||||
30 3 CBra 3
|
||||
33 3 Ket
|
||||
35 35 Ket
|
||||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 28
|
|||
23 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 28 Bra
|
||||
2 Any
|
||||
3 18 Once
|
||||
5 7 CBra 1
|
||||
8 23 Recurse
|
||||
10 0 Recurse
|
||||
12 4 Alt
|
||||
14 \1
|
||||
16 3 Alt
|
||||
18 $
|
||||
19 14 Ket
|
||||
21 18 Ket
|
||||
23 3 CBra 2
|
||||
26 3 Ket
|
||||
28 28 Ket
|
||||
30 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 35 Bra
|
||||
2 Any
|
||||
3 25 Once
|
||||
5 14 CBra 1
|
||||
8 30 Recurse
|
||||
10 0 Recurse
|
||||
12 3 CBra 2
|
||||
15 3 Ket
|
||||
17 12 Recurse
|
||||
19 4 Alt
|
||||
21 \1
|
||||
23 3 Alt
|
||||
25 $
|
||||
26 21 Ket
|
||||
28 25 Ket
|
||||
30 3 CBra 3
|
||||
33 3 Ket
|
||||
35 35 Ket
|
||||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 10
|
|||
34 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 42 Bra
|
||||
3 Any
|
||||
4 27 Once
|
||||
7 11 CBra 1
|
||||
12 34 Recurse
|
||||
15 0 Recurse
|
||||
18 6 Alt
|
||||
21 \1
|
||||
24 4 Alt
|
||||
27 $
|
||||
28 21 Ket
|
||||
31 27 Ket
|
||||
34 5 CBra 2
|
||||
39 5 Ket
|
||||
42 42 Ket
|
||||
45 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?3)(?R)()(?2)|\1|$)()/
|
||||
------------------------------------------------------------------
|
||||
0 53 Bra
|
||||
3 Any
|
||||
4 38 Once
|
||||
7 22 CBra 1
|
||||
12 45 Recurse
|
||||
15 0 Recurse
|
||||
18 5 CBra 2
|
||||
23 5 Ket
|
||||
26 18 Recurse
|
||||
29 6 Alt
|
||||
32 \1
|
||||
35 4 Alt
|
||||
38 $
|
||||
39 32 Ket
|
||||
42 38 Ket
|
||||
45 5 CBra 3
|
||||
50 5 Ket
|
||||
53 53 Ket
|
||||
56 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
||||
|
|
Loading…
Reference in New Issue