Fix missing non-existent reference test and bad error offsets for bad \g and \k

items.
This commit is contained in:
Philip.Hazel 2015-08-09 17:46:35 +00:00
parent e3d62b0ff8
commit 8e37b0b3c6
5 changed files with 39 additions and 23 deletions

View File

@ -119,6 +119,13 @@ recursions is postponed till match time. In the past, some easy ones were
detected at compile time. This re-writing was done in response to yet another detected at compile time. This re-writing was done in response to yet another
bug found by the LLVM fuzzer. bug found by the LLVM fuzzer.
31. A test for a back reference to a non-existent group was missing for items
such as \987. This caused incorrect code to be compiled. This issue was found
by Karl Skomski with a custom LLVM fuzzer.
32. Error messages for syntax errors following \g and \k were giving inaccurate
offsets in the pattern.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1841,9 +1841,8 @@ else
break; break;
} }
/* \1 to \9 are always back references. \8x and \9x are too, unless there /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
are an awful lot of previous captures; \1x to \7x are octal escapes if are octal escapes if there are not that many previous captures. */
there are not that many previous captures. */
if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount) if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount)
{ {
@ -6764,7 +6763,7 @@ for (;; ptr++)
if (*p != (PCRE2_UCHAR)terminator) if (*p != (PCRE2_UCHAR)terminator)
{ {
*errorcodeptr = ERR57; *errorcodeptr = ERR57;
break; goto FAILED;
} }
ptr++; ptr++;
goto HANDLE_NUMERICAL_RECURSION; goto HANDLE_NUMERICAL_RECURSION;
@ -6779,7 +6778,7 @@ for (;; ptr++)
ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
{ {
*errorcodeptr = ERR69; *errorcodeptr = ERR69;
break; goto FAILED;
} }
is_recurse = FALSE; is_recurse = FALSE;
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
@ -6801,6 +6800,11 @@ for (;; ptr++)
single group (i.e. not to a duplicated name). */ single group (i.e. not to a duplicated name). */
HANDLE_REFERENCE: HANDLE_REFERENCE:
if (recno > (int)cb->final_bracount)
{
*errorcodeptr = ERR15;
goto FAILED;
}
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
previous = code; previous = code;
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;

2
testdata/testinput2 vendored
View File

@ -4397,4 +4397,6 @@ a random value. /Ix
/0(?0)|(1)(*THEN)(*SKIP:0)(*FAIL)/ /0(?0)|(1)(*THEN)(*SKIP:0)(*FAIL)/
01 01
/(?(1)()\983040\2)/
# End of testinput2 # End of testinput2

View File

@ -71,7 +71,7 @@ No match: POSIX code 17: match failed
0: abc 0: abc
/(abc)\2/ /(abc)\2/
Failed: POSIX code 15: bad back reference at offset 7 Failed: POSIX code 15: bad back reference at offset 6
/(abc\1)/ /(abc\1)/
abc abc

37
testdata/testoutput2 vendored
View File

@ -227,7 +227,7 @@ Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b
Subject length lower bound = 1 Subject length lower bound = 1
/(ab\2)/ /(ab\2)/
Failed: error 115 at offset 6: reference to non-existent subpattern Failed: error 115 at offset 4: reference to non-existent subpattern
/{4,5}abc/ /{4,5}abc/
Failed: error 109 at offset 4: quantifier does not follow a repeatable item Failed: error 109 at offset 4: quantifier does not follow a repeatable item
@ -327,7 +327,7 @@ No match
No match No match
/(a)(b)(c)(d)(e)\6/ /(a)(b)(c)(d)(e)\6/
Failed: error 115 at offset 17: reference to non-existent subpattern Failed: error 115 at offset 16: reference to non-existent subpattern
/the quick brown fox/I /the quick brown fox/I
Capturing subpattern count = 0 Capturing subpattern count = 0
@ -901,13 +901,13 @@ Failed: error 109 at offset 2: quantifier does not follow a repeatable item
Failed: error 122 at offset 0: unmatched closing parenthesis Failed: error 122 at offset 0: unmatched closing parenthesis
/\1/ /\1/
Failed: error 115 at offset 2: reference to non-existent subpattern Failed: error 115 at offset 1: reference to non-existent subpattern
/\2/ /\2/
Failed: error 115 at offset 2: reference to non-existent subpattern Failed: error 115 at offset 1: reference to non-existent subpattern
/(a)|\2/ /(a)|\2/
Failed: error 115 at offset 6: reference to non-existent subpattern Failed: error 115 at offset 5: reference to non-existent subpattern
/a[b-a]/Ii /a[b-a]/Ii
Failed: error 108 at offset 4: range out of order in character class Failed: error 108 at offset 4: range out of order in character class
@ -8628,10 +8628,10 @@ Failed: error 162 at offset 3: subpattern name expected
Failed: error 162 at offset 3: subpattern name expected Failed: error 162 at offset 3: subpattern name expected
/\k/ /\k/
Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name Failed: error 169 at offset 1: \k is not followed by a braced, angle-bracketed, or quoted name
/\kabc/ /\kabc/
Failed: error 169 at offset 5: \k is not followed by a braced, angle-bracketed, or quoted name Failed: error 169 at offset 1: \k is not followed by a braced, angle-bracketed, or quoted name
/(?P=)/ /(?P=)/
Failed: error 162 at offset 4: subpattern name expected Failed: error 162 at offset 4: subpattern name expected
@ -8667,7 +8667,7 @@ Failed: error 130 at offset 6: unknown POSIX class name
Failed: error 130 at offset 3: unknown POSIX class name Failed: error 130 at offset 3: unknown POSIX class name
/(^(a|b\g<-1'c))/ /(^(a|b\g<-1'c))/
Failed: error 157 at offset 15: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/^(?+1)(?<a>x|y){0}z/ /^(?+1)(?<a>x|y){0}z/
xzxx xzxx
@ -13320,10 +13320,10 @@ Failed: error 144 at offset 13: group name must start with a non-digit
Failed: error 144 at offset 14: group name must start with a non-digit Failed: error 144 at offset 14: group name must start with a non-digit
/\g'3gh'/ /\g'3gh'/
Failed: error 157 at offset 7: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/\g<5fg>/ /\g<5fg>/
Failed: error 157 at offset 7: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/(?(<4gh>)abc)/ /(?(<4gh>)abc)/
Failed: error 144 at offset 4: group name must start with a non-digit Failed: error 144 at offset 4: group name must start with a non-digit
@ -13455,10 +13455,10 @@ Failed: error 178 at offset 3: digits missing in \x{} or \o{}
Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?)
/A\8B/ /A\8B/
Failed: error 115 at offset 4: reference to non-existent subpattern Failed: error 115 at offset 2: reference to non-existent subpattern
/A\9B/ /A\9B/
Failed: error 115 at offset 4: reference to non-existent subpattern Failed: error 115 at offset 2: reference to non-existent subpattern
# This one is here because Perl fails to match "12" for this pattern when the $ # This one is here because Perl fails to match "12" for this pattern when the $
# is present. # is present.
@ -14313,7 +14313,7 @@ No match
0: 0:
/((((((((x))))))))\81/ /((((((((x))))))))\81/
Failed: error 115 at offset 20: reference to non-existent subpattern Failed: error 115 at offset 19: reference to non-existent subpattern
xx1 xx1
/((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ /((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/
@ -14336,10 +14336,10 @@ Matched, but too many substrings
14: x 14: x
/\80/ /\80/
Failed: error 115 at offset 3: reference to non-existent subpattern Failed: error 115 at offset 2: reference to non-existent subpattern
/A\8B\9C/ /A\8B\9C/
Failed: error 115 at offset 7: reference to non-existent subpattern Failed: error 115 at offset 2: reference to non-existent subpattern
A8B9C A8B9C
/(?x:((?'a')) # comment (with parentheses) and | vertical /(?x:((?'a')) # comment (with parentheses) and | vertical
@ -14422,10 +14422,10 @@ Subject length lower bound = 1
------------------------------------------------------------------ ------------------------------------------------------------------
/(\9*+(?2);\3++()2|)++{/ /(\9*+(?2);\3++()2|)++{/
Failed: error 115 at offset 22: reference to non-existent subpattern Failed: error 115 at offset 2: reference to non-existent subpattern
/\V\x85\9*+((?2)\3++()2)*:2/ /\V\x85\9*+((?2)\3++()2)*:2/
Failed: error 115 at offset 26: reference to non-existent subpattern Failed: error 115 at offset 7: reference to non-existent subpattern
/(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames /(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames
@ -14648,4 +14648,7 @@ Failed: error 161 at offset 16: number is too big
01 01
No match No match
/(?(1)()\983040\2)/
Failed: error 115 at offset 13: reference to non-existent subpattern
# End of testinput2 # End of testinput2