Fix recursive forward reference bug.
This commit is contained in:
parent
dc8d444147
commit
d5191510db
|
@ -100,6 +100,12 @@ behaviour.
|
||||||
25. Static linking against the PCRE2 library using the pkg-config module was
|
25. Static linking against the PCRE2 library using the pkg-config module was
|
||||||
failing on missing pthread symbols.
|
failing on missing pthread symbols.
|
||||||
|
|
||||||
|
26. If a group that contained a recursive back reference also contained a
|
||||||
|
forward reference subroutine call followed by a non-forward-reference
|
||||||
|
subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
|
||||||
|
compile correct code, leading to undefined behaviour or an internally detected
|
||||||
|
error. This bug was discovered by the LLVM fuzzer.
|
||||||
|
|
||||||
|
|
||||||
Version 10.10 06-March-2015
|
Version 10.10 06-March-2015
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -2605,11 +2605,12 @@ have their offsets adjusted. That is one of the jobs of this function. Before
|
||||||
it is called, the partially compiled regex must be temporarily terminated with
|
it is called, the partially compiled regex must be temporarily terminated with
|
||||||
OP_END.
|
OP_END.
|
||||||
|
|
||||||
This function has been extended with the possibility of forward references for
|
This function has been extended to cope with forward references for recursions
|
||||||
recursions and subroutine calls. It must also check the list of such references
|
and subroutine calls. It must check the list of such references for the
|
||||||
for the group we are dealing with. If it finds that one of the recursions in
|
group we are dealing with. If it finds that one of the recursions in the
|
||||||
the current group is on this list, it adjusts the offset in the list, not the
|
current group is on this list, it does not adjust the value in the reference
|
||||||
value in the reference (which is a group number).
|
(which is a group number). After the group has been scanned, all the offsets in
|
||||||
|
the forward reference list for the group are adjusted.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
group points to the start of the group
|
group points to the start of the group
|
||||||
|
@ -2625,29 +2626,24 @@ static void
|
||||||
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
|
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
|
||||||
size_t save_hwm_offset)
|
size_t save_hwm_offset)
|
||||||
{
|
{
|
||||||
|
uint32_t offset;
|
||||||
|
PCRE2_UCHAR *hc;
|
||||||
PCRE2_UCHAR *ptr = group;
|
PCRE2_UCHAR *ptr = group;
|
||||||
|
|
||||||
|
/* Scan the group for recursions. For each one found, check the forward
|
||||||
|
reference list. */
|
||||||
|
|
||||||
while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
||||||
{
|
{
|
||||||
int offset;
|
|
||||||
PCRE2_UCHAR *hc;
|
|
||||||
|
|
||||||
/* See if this recursion is on the forward reference list. If so, adjust the
|
|
||||||
reference. */
|
|
||||||
|
|
||||||
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
||||||
hc += LINK_SIZE)
|
hc += LINK_SIZE)
|
||||||
{
|
{
|
||||||
offset = (int)GET(hc, 0);
|
offset = (int)GET(hc, 0);
|
||||||
if (cb->start_code + offset == ptr + 1)
|
if (cb->start_code + offset == ptr + 1) break;
|
||||||
{
|
|
||||||
PUT(hc, 0, offset + adjust);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise, adjust the recursion offset if it's after the start of this
|
/* If we have not found this recursion on the forward reference list, adjust
|
||||||
group. */
|
the recursion's offset if it's after the start of this group. */
|
||||||
|
|
||||||
if (hc >= cb->hwm)
|
if (hc >= cb->hwm)
|
||||||
{
|
{
|
||||||
|
@ -2657,6 +2653,15 @@ while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL)
|
||||||
|
|
||||||
ptr += 1 + LINK_SIZE;
|
ptr += 1 + LINK_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now adjust all forward reference offsets for the group. */
|
||||||
|
|
||||||
|
for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
|
||||||
|
hc += LINK_SIZE)
|
||||||
|
{
|
||||||
|
offset = (int)GET(hc, 0);
|
||||||
|
PUT(hc, 0, offset + adjust);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -7111,7 +7116,8 @@ for (;;)
|
||||||
/* If it was a capturing subpattern, check to see if it contained any
|
/* If it was a capturing subpattern, check to see if it contained any
|
||||||
recursive back references. If so, we must wrap it in atomic brackets.
|
recursive back references. If so, we must wrap it in atomic brackets.
|
||||||
Because we are moving code along, we must ensure that any pending recursive
|
Because we are moving code along, we must ensure that any pending recursive
|
||||||
references are updated. In any event, remove the block from the chain. */
|
or forward subroutine references are updated. In any event, remove the
|
||||||
|
block from the chain. */
|
||||||
|
|
||||||
if (capnumber > 0)
|
if (capnumber > 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -4290,4 +4290,8 @@ a random value. /Ix
|
||||||
/A\8B\9C/
|
/A\8B\9C/
|
||||||
A8B9C
|
A8B9C
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/B
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/B
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -142,4 +142,8 @@
|
||||||
|
|
||||||
"(?1)(?#?'){2}(a)"
|
"(?1)(?#?'){2}(a)"
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/
|
||||||
|
|
||||||
# End of testinput8
|
# End of testinput8
|
||||||
|
|
|
@ -14348,4 +14348,47 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
|
||||||
Failed: error 115 at offset 7: reference to non-existent subpattern
|
Failed: error 115 at offset 7: reference to non-existent subpattern
|
||||||
A8B9C
|
A8B9C
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Any
|
||||||
|
Once
|
||||||
|
CBra 1
|
||||||
|
Recurse
|
||||||
|
Recurse
|
||||||
|
Alt
|
||||||
|
\1
|
||||||
|
Alt
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
CBra 2
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Any
|
||||||
|
Once
|
||||||
|
CBra 1
|
||||||
|
Recurse
|
||||||
|
Recurse
|
||||||
|
CBra 2
|
||||||
|
Ket
|
||||||
|
Recurse
|
||||||
|
Alt
|
||||||
|
\1
|
||||||
|
Alt
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
CBra 3
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 14
|
||||||
23 End
|
23 End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 28 Bra
|
||||||
|
2 Any
|
||||||
|
3 18 Once
|
||||||
|
5 7 CBra 1
|
||||||
|
8 23 Recurse
|
||||||
|
10 0 Recurse
|
||||||
|
12 4 Alt
|
||||||
|
14 \1
|
||||||
|
16 3 Alt
|
||||||
|
18 $
|
||||||
|
19 14 Ket
|
||||||
|
21 18 Ket
|
||||||
|
23 3 CBra 2
|
||||||
|
26 3 Ket
|
||||||
|
28 28 Ket
|
||||||
|
30 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 35 Bra
|
||||||
|
2 Any
|
||||||
|
3 25 Once
|
||||||
|
5 14 CBra 1
|
||||||
|
8 30 Recurse
|
||||||
|
10 0 Recurse
|
||||||
|
12 3 CBra 2
|
||||||
|
15 3 Ket
|
||||||
|
17 12 Recurse
|
||||||
|
19 4 Alt
|
||||||
|
21 \1
|
||||||
|
23 3 Alt
|
||||||
|
25 $
|
||||||
|
26 21 Ket
|
||||||
|
28 25 Ket
|
||||||
|
30 3 CBra 3
|
||||||
|
33 3 Ket
|
||||||
|
35 35 Ket
|
||||||
|
37 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput8
|
# End of testinput8
|
||||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 28
|
||||||
23 End
|
23 End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 28 Bra
|
||||||
|
2 Any
|
||||||
|
3 18 Once
|
||||||
|
5 7 CBra 1
|
||||||
|
8 23 Recurse
|
||||||
|
10 0 Recurse
|
||||||
|
12 4 Alt
|
||||||
|
14 \1
|
||||||
|
16 3 Alt
|
||||||
|
18 $
|
||||||
|
19 14 Ket
|
||||||
|
21 18 Ket
|
||||||
|
23 3 CBra 2
|
||||||
|
26 3 Ket
|
||||||
|
28 28 Ket
|
||||||
|
30 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 35 Bra
|
||||||
|
2 Any
|
||||||
|
3 25 Once
|
||||||
|
5 14 CBra 1
|
||||||
|
8 30 Recurse
|
||||||
|
10 0 Recurse
|
||||||
|
12 3 CBra 2
|
||||||
|
15 3 Ket
|
||||||
|
17 12 Recurse
|
||||||
|
19 4 Alt
|
||||||
|
21 \1
|
||||||
|
23 3 Alt
|
||||||
|
25 $
|
||||||
|
26 21 Ket
|
||||||
|
28 25 Ket
|
||||||
|
30 3 CBra 3
|
||||||
|
33 3 Ket
|
||||||
|
35 35 Ket
|
||||||
|
37 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput8
|
# End of testinput8
|
||||||
|
|
|
@ -770,4 +770,47 @@ Memory allocation (code space): 10
|
||||||
34 End
|
34 End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?2)(?R)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 42 Bra
|
||||||
|
3 Any
|
||||||
|
4 27 Once
|
||||||
|
7 11 CBra 1
|
||||||
|
12 34 Recurse
|
||||||
|
15 0 Recurse
|
||||||
|
18 6 Alt
|
||||||
|
21 \1
|
||||||
|
24 4 Alt
|
||||||
|
27 $
|
||||||
|
28 21 Ket
|
||||||
|
31 27 Ket
|
||||||
|
34 5 CBra 2
|
||||||
|
39 5 Ket
|
||||||
|
42 42 Ket
|
||||||
|
45 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/.((?3)(?R)()(?2)|\1|$)()/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 53 Bra
|
||||||
|
3 Any
|
||||||
|
4 38 Once
|
||||||
|
7 22 CBra 1
|
||||||
|
12 45 Recurse
|
||||||
|
15 0 Recurse
|
||||||
|
18 5 CBra 2
|
||||||
|
23 5 Ket
|
||||||
|
26 18 Recurse
|
||||||
|
29 6 Alt
|
||||||
|
32 \1
|
||||||
|
35 4 Alt
|
||||||
|
38 $
|
||||||
|
39 32 Ket
|
||||||
|
42 38 Ket
|
||||||
|
45 5 CBra 3
|
||||||
|
50 5 Ket
|
||||||
|
53 53 Ket
|
||||||
|
56 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput8
|
# End of testinput8
|
||||||
|
|
Loading…
Reference in New Issue