From 01c4647b029f2bd661ce86bcc69c2a475230bc08 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 21 Jul 2015 13:42:14 +0000 Subject: [PATCH] Fix "running for ever" bug for deeply nested [: sequences. --- ChangeLog | 4 ++++ src/pcre2_compile.c | 22 +++++++++------------- testdata/testinput2 | 2 ++ testdata/testoutput2 | 3 +++ 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 57dc1d0..6279995 100644 --- a/ChangeLog +++ b/ChangeLog @@ -58,6 +58,10 @@ compiled and could cause reading from uninitialized memory or an incorrect error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The first of these bugs was discovered by Karl Skomski with the LLVM fuzzer. +16. Pathological patterns containing many nested occurrences of [: caused +pcre2_compile() to run for a very long time. This bug was found by the LLVM +fuzzer. + Version 10.20 30-June-2015 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 6e18a75..b9f7e7f 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2583,7 +2583,9 @@ when Perl does, I think. A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the appearance of a nested POSIX class supersedes an apparent external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or -a digit. +a digit. This is handled by returning FALSE if the start of a new group with +the same terminator is encountered, since the next closing sequence must close +the nested group, not the outer one. In Perl, unescaped square brackets may also appear as part of class names. For example, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for @@ -2609,21 +2611,15 @@ for (++ptr; *ptr != CHAR_NULL; ptr++) if (*ptr == CHAR_BACKSLASH && (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH)) ptr++; - else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; - else + else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || + *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; + else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) { - if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) - { - *endptr = ptr; - return TRUE; - } - if (*ptr == CHAR_LEFT_SQUARE_BRACKET && - (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || - ptr[1] == CHAR_EQUALS_SIGN) && - check_posix_syntax(ptr, endptr)) - return FALSE; + *endptr = ptr; + return TRUE; } } + return FALSE; } diff --git a/testdata/testinput2 b/testdata/testinput2 index e7c4825..5af7aa2 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4350,4 +4350,6 @@ a random value. /Ix /[[:\\](?'abc')[a:]/I +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index f3e4ecf..04405cf 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14534,4 +14534,7 @@ Named capturing subpatterns: Starting code units: : [ \ Subject length lower bound = 2 +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" +Failed: error 106 at offset 353: missing terminating ] for character class + # End of testinput2