Fix another fuzzer bug.
This commit is contained in:
parent
f957e7bfa8
commit
31241914a5
|
@ -53,6 +53,12 @@ LLVM fuzzer.
|
||||||
14. Fix infinite recursion in the JIT compiler when certain patterns such as
|
14. Fix infinite recursion in the JIT compiler when certain patterns such as
|
||||||
/(?:|a|){100}x/ are analysed.
|
/(?:|a|){100}x/ are analysed.
|
||||||
|
|
||||||
|
15. Some patterns with character classes involving [: and \\ were incorrectly
|
||||||
|
compiled and could cause reading from uninitialized memory or an incorrect
|
||||||
|
error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The
|
||||||
|
first of these bugs was discovered by Karl Skomski with the LLVM fuzzer.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -2574,11 +2574,11 @@ didn't consider this to be a POSIX class. Likewise for [:1234:].
|
||||||
The problem in trying to be exactly like Perl is in the handling of escapes. We
|
The problem in trying to be exactly like Perl is in the handling of escapes. We
|
||||||
have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
|
have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
|
||||||
class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
|
class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
|
||||||
below handles the special case of \], but does not try to do any other escape
|
below handles the special cases \\ and \], but does not try to do any other
|
||||||
processing. This makes it different from Perl for cases such as [:l\ower:]
|
escape processing. This makes it different from Perl for cases such as
|
||||||
where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
|
[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
|
||||||
"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,
|
not recognize "l\ower". This is a lesser evil than not diagnosing bad classes
|
||||||
I think.
|
when Perl does, I think.
|
||||||
|
|
||||||
A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
|
A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
|
||||||
It seems that the appearance of a nested POSIX class supersedes an apparent
|
It seems that the appearance of a nested POSIX class supersedes an apparent
|
||||||
|
@ -2606,7 +2606,9 @@ terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
|
||||||
|
|
||||||
for (++ptr; *ptr != CHAR_NULL; ptr++)
|
for (++ptr; *ptr != CHAR_NULL; ptr++)
|
||||||
{
|
{
|
||||||
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++;
|
if (*ptr == CHAR_BACKSLASH &&
|
||||||
|
(ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH))
|
||||||
|
ptr++;
|
||||||
else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
|
else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3010,16 +3012,16 @@ nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
|
||||||
for (; ptr < cb->end_pattern; ptr++)
|
for (; ptr < cb->end_pattern; ptr++)
|
||||||
{
|
{
|
||||||
c = *ptr;
|
c = *ptr;
|
||||||
|
|
||||||
/* Parenthesized groups set skiptoket when all following characters up to the
|
/* Parenthesized groups set skiptoket when all following characters up to the
|
||||||
next closing parenthesis must be ignored. The parenthesis itself must be
|
next closing parenthesis must be ignored. The parenthesis itself must be
|
||||||
processed (to end the nested parenthesized item). */
|
processed (to end the nested parenthesized item). */
|
||||||
|
|
||||||
if (skiptoket)
|
if (skiptoket)
|
||||||
{
|
{
|
||||||
if (c != CHAR_RIGHT_PARENTHESIS) continue;
|
if (c != CHAR_RIGHT_PARENTHESIS) continue;
|
||||||
skiptoket = FALSE;
|
skiptoket = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip over literals */
|
/* Skip over literals */
|
||||||
|
|
||||||
|
@ -3117,6 +3119,8 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
PCRE2_SPTR tempptr;
|
||||||
|
|
||||||
if (c == CHAR_NULL && ptr >= cb->end_pattern)
|
if (c == CHAR_NULL && ptr >= cb->end_pattern)
|
||||||
{
|
{
|
||||||
errorcode = ERR6; /* Missing terminating ']' */
|
errorcode = ERR6; /* Missing terminating ']' */
|
||||||
|
@ -3143,12 +3147,11 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip POSIX class names. */
|
/* Skip POSIX class names. */
|
||||||
|
|
||||||
if (c == CHAR_LEFT_SQUARE_BRACKET &&
|
if (c == CHAR_LEFT_SQUARE_BRACKET &&
|
||||||
(ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
|
(ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
|
||||||
ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &ptr))
|
ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
|
||||||
{
|
{
|
||||||
ptr++;
|
ptr = tempptr + 1;
|
||||||
}
|
}
|
||||||
else if (c == CHAR_BACKSLASH)
|
else if (c == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
|
@ -3189,13 +3192,13 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
default:
|
default:
|
||||||
ptr += 2;
|
ptr += 2;
|
||||||
if (ptr[0] == CHAR_R || /* (?R) */
|
if (ptr[0] == CHAR_R || /* (?R) */
|
||||||
ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */
|
ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */
|
||||||
IS_DIGIT(ptr[0]) || /* (?n) */
|
IS_DIGIT(ptr[0]) || /* (?n) */
|
||||||
(ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */
|
(ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */
|
||||||
{
|
{
|
||||||
skiptoket = TRUE;
|
skiptoket = TRUE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle (?| and (?imsxJU: which are the only other valid forms. Both
|
/* Handle (?| and (?imsxJU: which are the only other valid forms. Both
|
||||||
need a new block on the nest stack. */
|
need a new block on the nest stack. */
|
||||||
|
|
|
@ -4346,4 +4346,8 @@ a random value. /Ix
|
||||||
|
|
||||||
/((?x)(?#))#(?'abc')/I
|
/((?x)(?#))#(?'abc')/I
|
||||||
|
|
||||||
|
/[[:\\](?<[::]/
|
||||||
|
|
||||||
|
/[[:\\](?'abc')[a:]/I
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14524,4 +14524,14 @@ Named capturing subpatterns:
|
||||||
First code unit = '#'
|
First code unit = '#'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/[[:\\](?<[::]/
|
||||||
|
Failed: error 124 at offset 9: unrecognized character after (?<
|
||||||
|
|
||||||
|
/[[:\\](?'abc')[a:]/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Named capturing subpatterns:
|
||||||
|
abc 1
|
||||||
|
Starting code units: : [ \
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue