From d5191510db45c742d8536e206eaf7c26ea7303ed Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 3 May 2015 16:46:56 +0000 Subject: [PATCH] Fix recursive forward reference bug. --- ChangeLog | 6 ++++++ src/pcre2_compile.c | 44 +++++++++++++++++++++++------------------ testdata/testinput2 | 4 ++++ testdata/testinput8 | 4 ++++ testdata/testoutput2 | 43 ++++++++++++++++++++++++++++++++++++++++ testdata/testoutput8-16 | 43 ++++++++++++++++++++++++++++++++++++++++ testdata/testoutput8-32 | 43 ++++++++++++++++++++++++++++++++++++++++ testdata/testoutput8-8 | 43 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 211 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 52645fc..7a29394 100644 --- a/ChangeLog +++ b/ChangeLog @@ -100,6 +100,12 @@ behaviour. 25. Static linking against the PCRE2 library using the pkg-config module was failing on missing pthread symbols. +26. If a group that contained a recursive back reference also contained a +forward reference subroutine call followed by a non-forward-reference +subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to +compile correct code, leading to undefined behaviour or an internally detected +error. This bug was discovered by the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 582ca7f..2955432 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2605,11 +2605,12 @@ have their offsets adjusted. That is one of the jobs of this function. Before it is called, the partially compiled regex must be temporarily terminated with OP_END. -This function has been extended with the possibility of forward references for -recursions and subroutine calls. It must also check the list of such references -for the group we are dealing with. If it finds that one of the recursions in -the current group is on this list, it adjusts the offset in the list, not the -value in the reference (which is a group number). +This function has been extended to cope with forward references for recursions +and subroutine calls. It must check the list of such references for the +group we are dealing with. If it finds that one of the recursions in the +current group is on this list, it does not adjust the value in the reference +(which is a group number). After the group has been scanned, all the offsets in +the forward reference list for the group are adjusted. Arguments: group points to the start of the group @@ -2625,29 +2626,24 @@ static void adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb, size_t save_hwm_offset) { +uint32_t offset; +PCRE2_UCHAR *hc; PCRE2_UCHAR *ptr = group; +/* Scan the group for recursions. For each one found, check the forward +reference list. */ + while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL) { - int offset; - PCRE2_UCHAR *hc; - - /* See if this recursion is on the forward reference list. If so, adjust the - reference. */ - for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm; hc += LINK_SIZE) { offset = (int)GET(hc, 0); - if (cb->start_code + offset == ptr + 1) - { - PUT(hc, 0, offset + adjust); - break; - } + if (cb->start_code + offset == ptr + 1) break; } - /* Otherwise, adjust the recursion offset if it's after the start of this - group. */ + /* If we have not found this recursion on the forward reference list, adjust + the recursion's offset if it's after the start of this group. */ if (hc >= cb->hwm) { @@ -2657,6 +2653,15 @@ while ((ptr = (PCRE2_UCHAR *)find_recurse(ptr, utf)) != NULL) ptr += 1 + LINK_SIZE; } + +/* Now adjust all forward reference offsets for the group. */ + +for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm; + hc += LINK_SIZE) + { + offset = (int)GET(hc, 0); + PUT(hc, 0, offset + adjust); + } } @@ -7111,7 +7116,8 @@ for (;;) /* If it was a capturing subpattern, check to see if it contained any recursive back references. If so, we must wrap it in atomic brackets. Because we are moving code along, we must ensure that any pending recursive - references are updated. In any event, remove the block from the chain. */ + or forward subroutine references are updated. In any event, remove the + block from the chain. */ if (capnumber > 0) { diff --git a/testdata/testinput2 b/testdata/testinput2 index f7e9191..857c065 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4290,4 +4290,8 @@ a random value. /Ix /A\8B\9C/ A8B9C +/.((?2)(?R)|\1|$)()/B + +/.((?3)(?R)()(?2)|\1|$)()/B + # End of testinput2 diff --git a/testdata/testinput8 b/testdata/testinput8 index 88928b9..e7f957d 100644 --- a/testdata/testinput8 +++ b/testdata/testinput8 @@ -142,4 +142,8 @@ "(?1)(?#?'){2}(a)" +/.((?2)(?R)|\1|$)()/ + +/.((?3)(?R)()(?2)|\1|$)()/ + # End of testinput8 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 47cf1cb..3a0dd0c 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14348,4 +14348,47 @@ Failed: error 115 at offset 3: reference to non-existent subpattern Failed: error 115 at offset 7: reference to non-existent subpattern A8B9C +/.((?2)(?R)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + Once + CBra 1 + Recurse + Recurse + Alt + \1 + Alt + $ + Ket + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + Once + CBra 1 + Recurse + Recurse + CBra 2 + Ket + Recurse + Alt + \1 + Alt + $ + Ket + Ket + CBra 3 + Ket + Ket + End +------------------------------------------------------------------ + # End of testinput2 diff --git a/testdata/testoutput8-16 b/testdata/testoutput8-16 index a4ea173..2a8bb76 100644 --- a/testdata/testoutput8-16 +++ b/testdata/testoutput8-16 @@ -770,4 +770,47 @@ Memory allocation (code space): 14 23 End ------------------------------------------------------------------ +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 28 Bra + 2 Any + 3 18 Once + 5 7 CBra 1 + 8 23 Recurse + 10 0 Recurse + 12 4 Alt + 14 \1 + 16 3 Alt + 18 $ + 19 14 Ket + 21 18 Ket + 23 3 CBra 2 + 26 3 Ket + 28 28 Ket + 30 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 35 Bra + 2 Any + 3 25 Once + 5 14 CBra 1 + 8 30 Recurse + 10 0 Recurse + 12 3 CBra 2 + 15 3 Ket + 17 12 Recurse + 19 4 Alt + 21 \1 + 23 3 Alt + 25 $ + 26 21 Ket + 28 25 Ket + 30 3 CBra 3 + 33 3 Ket + 35 35 Ket + 37 End +------------------------------------------------------------------ + # End of testinput8 diff --git a/testdata/testoutput8-32 b/testdata/testoutput8-32 index 579e1b1..41fa01d 100644 --- a/testdata/testoutput8-32 +++ b/testdata/testoutput8-32 @@ -770,4 +770,47 @@ Memory allocation (code space): 28 23 End ------------------------------------------------------------------ +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 28 Bra + 2 Any + 3 18 Once + 5 7 CBra 1 + 8 23 Recurse + 10 0 Recurse + 12 4 Alt + 14 \1 + 16 3 Alt + 18 $ + 19 14 Ket + 21 18 Ket + 23 3 CBra 2 + 26 3 Ket + 28 28 Ket + 30 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 35 Bra + 2 Any + 3 25 Once + 5 14 CBra 1 + 8 30 Recurse + 10 0 Recurse + 12 3 CBra 2 + 15 3 Ket + 17 12 Recurse + 19 4 Alt + 21 \1 + 23 3 Alt + 25 $ + 26 21 Ket + 28 25 Ket + 30 3 CBra 3 + 33 3 Ket + 35 35 Ket + 37 End +------------------------------------------------------------------ + # End of testinput8 diff --git a/testdata/testoutput8-8 b/testdata/testoutput8-8 index 7afa8d8..15bc3de 100644 --- a/testdata/testoutput8-8 +++ b/testdata/testoutput8-8 @@ -770,4 +770,47 @@ Memory allocation (code space): 10 34 End ------------------------------------------------------------------ +/.((?2)(?R)|\1|$)()/ +------------------------------------------------------------------ + 0 42 Bra + 3 Any + 4 27 Once + 7 11 CBra 1 + 12 34 Recurse + 15 0 Recurse + 18 6 Alt + 21 \1 + 24 4 Alt + 27 $ + 28 21 Ket + 31 27 Ket + 34 5 CBra 2 + 39 5 Ket + 42 42 Ket + 45 End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/ +------------------------------------------------------------------ + 0 53 Bra + 3 Any + 4 38 Once + 7 22 CBra 1 + 12 45 Recurse + 15 0 Recurse + 18 5 CBra 2 + 23 5 Ket + 26 18 Recurse + 29 6 Alt + 32 \1 + 35 4 Alt + 38 $ + 39 32 Ket + 42 38 Ket + 45 5 CBra 3 + 50 5 Ket + 53 53 Ket + 56 End +------------------------------------------------------------------ + # End of testinput8