Fix empty comment (?#) bug

This commit is contained in:
Philip.Hazel 2015-07-17 16:25:21 +00:00
parent d702527628
commit 21b15d96f2
4 changed files with 37 additions and 13 deletions

View File

@ -46,6 +46,10 @@ for example, /(?(R))*+/, was incorrectly compiled.
12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian 12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian
Persch). Persch).
13. An empty comment (?#) in a pattern was incorrectly processed and could
provoke a buffer overflow. This bug was discovered by Karl Skomski with the
LLVM fuzzer.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -2997,6 +2997,7 @@ int namelen;
int i; int i;
BOOL inescq = FALSE; BOOL inescq = FALSE;
BOOL isdupname; BOOL isdupname;
BOOL skiptoket = FALSE;
BOOL utf = (options & PCRE2_UTF) != 0; BOOL utf = (options & PCRE2_UTF) != 0;
BOOL negate_class; BOOL negate_class;
PCRE2_SPTR name; PCRE2_SPTR name;
@ -3009,6 +3010,16 @@ nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
for (; ptr < cb->end_pattern; ptr++) for (; ptr < cb->end_pattern; ptr++)
{ {
c = *ptr; c = *ptr;
/* Parenthesized groups set skiptoket when all following characters up to the
next closing parenthesis must be ignored. The parenthesis itself must be
processed (to end the nested parenthesized item). */
if (skiptoket)
{
if (c != CHAR_RIGHT_PARENTHESIS) continue;
skiptoket = FALSE;
}
/* Skip over literals */ /* Skip over literals */
@ -3177,9 +3188,14 @@ for (; ptr < cb->end_pattern; ptr++)
{ {
default: default:
ptr += 2; ptr += 2;
if (ptr[0] == CHAR_R || /* (?R) */ if (ptr[0] == CHAR_R || /* (?R) */
IS_DIGIT(ptr[0]) || /* (?n) */ ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */
(ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break; /* (?-n) */ IS_DIGIT(ptr[0]) || /* (?n) */
(ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */
{
skiptoket = TRUE;
break;
}
/* Handle (?| and (?imsxJU: which are the only other valid forms. Both /* Handle (?| and (?imsxJU: which are the only other valid forms. Both
need a new block on the nest stack. */ need a new block on the nest stack. */
@ -3304,16 +3320,6 @@ for (; ptr < cb->end_pattern; ptr++)
while (ptr[0] != delimiter); while (ptr[0] != delimiter);
break; break;
case CHAR_NUMBER_SIGN:
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR18;
goto FAILED;
}
break;
case CHAR_LEFT_PARENTHESIS: case CHAR_LEFT_PARENTHESIS:
nest_depth++; nest_depth++;
/* Fall through */ /* Fall through */

4
testdata/testinput2 vendored
View File

@ -4342,4 +4342,8 @@ a random value. /Ix
/(?(R))*+/B /(?(R))*+/B
abcd abcd
/((?x)(?#))#(?'/
/((?x)(?#))#(?'abc')/I
# End of testinput2 # End of testinput2

10
testdata/testoutput2 vendored
View File

@ -14514,4 +14514,14 @@ Failed: error 124 at offset 10: unrecognized character after (?<
abcd abcd
0: 0:
/((?x)(?#))#(?'/
Failed: error 124 at offset 14: unrecognized character after (?<
/((?x)(?#))#(?'abc')/I
Capturing subpattern count = 2
Named capturing subpatterns:
abc 2
First code unit = '#'
Subject length lower bound = 1
# End of testinput2 # End of testinput2