Fix bugs for patterns with conditional groups starting with assertions.

This commit is contained in:
Philip.Hazel 2015-07-22 14:34:31 +00:00
parent e4d630c4af
commit 83c7c24b92
6 changed files with 59 additions and 11 deletions

View File

@ -72,6 +72,11 @@ branch and must therefore be treated as potentially matching an empty string.
19. If (?R was followed by - or + incorrect behaviour happened instead of a 19. If (?R was followed by - or + incorrect behaviour happened instead of a
diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer. diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer.
20. Another bug that was introduced by change 36 for 10.20: conditional groups
whose condition was an assertion preceded by an explicit callout with a string
argument might be incorrectly processed, especially if the string contained \Q.
This bug was discovered by Karl Skomski with the LLVM fuzzer.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -3324,10 +3324,39 @@ for (; ptr < cb->end_pattern; ptr++)
} }
break; break;
/* Conditional group */
case CHAR_LEFT_PARENTHESIS: case CHAR_LEFT_PARENTHESIS:
if (ptr[3] != CHAR_QUESTION_MARK) /* Not assertion or callout */
{
nest_depth++; nest_depth++;
ptr += 2;
break;
}
/* Must be an assertion or a callout */
switch(ptr[4])
{
case CHAR_LESS_THAN_SIGN:
if (ptr[5] != CHAR_EXCLAMATION_MARK && ptr[5] != CHAR_EQUALS_SIGN)
goto MISSING_ASSERTION;
/* Fall through */ /* Fall through */
case CHAR_C:
case CHAR_EXCLAMATION_MARK:
case CHAR_EQUALS_SIGN:
ptr++;
break;
default:
MISSING_ASSERTION:
ptr += 3; /* To improve error message */
errorcode = ERR28;
goto FAILED;
}
break;
case CHAR_COLON: case CHAR_COLON:
case CHAR_GREATER_THAN_SIGN: case CHAR_GREATER_THAN_SIGN:
case CHAR_EQUALS_SIGN: case CHAR_EQUALS_SIGN:

View File

@ -92,7 +92,7 @@ static const char compile_error_texts[] =
"failed to allocate heap memory\0" "failed to allocate heap memory\0"
"unmatched closing parenthesis\0" "unmatched closing parenthesis\0"
"internal error: code overflow\0" "internal error: code overflow\0"
"unrecognized character after (?<\0" "letter or underscore expected after (?< or (?'\0"
/* 25 */ /* 25 */
"lookbehind assertion is not fixed length\0" "lookbehind assertion is not fixed length\0"
"malformed number or name after (?(\0" "malformed number or name after (?(\0"

4
testdata/testinput2 vendored
View File

@ -4356,4 +4356,8 @@ a random value. /Ix
/(?R-:(?</ /(?R-:(?</
/(?(?C{\Q})(?!(?'/
/(?(?C{\Q})(?!(?'abc')))/I
# End of testinput2 # End of testinput2

24
testdata/testoutput2 vendored
View File

@ -940,7 +940,7 @@ Failed: error 122 at offset 0: unmatched closing parenthesis
Failed: error 114 at offset 4: missing closing parenthesis Failed: error 114 at offset 4: missing closing parenthesis
/(?<%)b/ /(?<%)b/
Failed: error 124 at offset 3: unrecognized character after (?< Failed: error 124 at offset 3: letter or underscore expected after (?< or (?'
/a(?{)b/ /a(?{)b/
Failed: error 111 at offset 3: unrecognized character after (? or (?- Failed: error 111 at offset 3: unrecognized character after (? or (?-
@ -14488,16 +14488,16 @@ Failed: error 161 at offset 32: number is too big
------------------------------------------------------------------ ------------------------------------------------------------------
/[[:>:]](?<)/ /[[:>:]](?<)/
Failed: error 124 at offset 10: unrecognized character after (?< Failed: error 124 at offset 10: letter or underscore expected after (?< or (?'
/((?x)(*:0))#(?'/ /((?x)(*:0))#(?'/
Failed: error 124 at offset 15: unrecognized character after (?< Failed: error 124 at offset 15: letter or underscore expected after (?< or (?'
/(?C$[$)(?<]/ /(?C$[$)(?<]/
Failed: error 124 at offset 10: unrecognized character after (?< Failed: error 124 at offset 10: letter or underscore expected after (?< or (?'
/(?C$)$)(?<]/ /(?C$)$)(?<]/
Failed: error 124 at offset 10: unrecognized character after (?< Failed: error 124 at offset 10: letter or underscore expected after (?< or (?'
/(?(R))*+/B /(?(R))*+/B
------------------------------------------------------------------ ------------------------------------------------------------------
@ -14515,7 +14515,7 @@ Failed: error 124 at offset 10: unrecognized character after (?<
0: 0:
/((?x)(?#))#(?'/ /((?x)(?#))#(?'/
Failed: error 124 at offset 14: unrecognized character after (?< Failed: error 124 at offset 14: letter or underscore expected after (?< or (?'
/((?x)(?#))#(?'abc')/I /((?x)(?#))#(?'abc')/I
Capturing subpattern count = 2 Capturing subpattern count = 2
@ -14525,7 +14525,7 @@ First code unit = '#'
Subject length lower bound = 1 Subject length lower bound = 1
/[[:\\](?<[::]/ /[[:\\](?<[::]/
Failed: error 124 at offset 9: unrecognized character after (?< Failed: error 124 at offset 9: letter or underscore expected after (?< or (?'
/[[:\\](?'abc')[a:]/I /[[:\\](?'abc')[a:]/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -14556,4 +14556,14 @@ Failed: error 106 at offset 353: missing terminating ] for character class
/(?R-:(?</ /(?R-:(?</
Failed: error 129 at offset 3: (?R or (?[+-]digits must be followed by ) Failed: error 129 at offset 3: (?R or (?[+-]digits must be followed by )
/(?(?C{\Q})(?!(?'/
Failed: error 124 at offset 16: letter or underscore expected after (?< or (?'
/(?(?C{\Q})(?!(?'abc')))/I
Capturing subpattern count = 1
Named capturing subpatterns:
abc 1
May match empty string
Subject length lower bound = 0
# End of testinput2 # End of testinput2

View File

@ -4051,6 +4051,6 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis
/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
Failed: error 139 at offset 113: closing parenthesis for (?C expected Failed: error 124 at offset 113: letter or underscore expected after (?< or (?'
# End of testinput5 # End of testinput5