From 766fbc81d0f596c9a16ccbbe8c67e5f72a476c83 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 25 Oct 2015 17:35:34 +0000 Subject: [PATCH] Fix PCRE2_NO_AUTO_CAPTURE bug. --- ChangeLog | 4 ++++ src/pcre2_compile.c | 25 +++++++++++++------------ testdata/testinput2 | 4 ++++ testdata/testoutput2 | 6 ++++++ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 82144ba..188aaaa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -213,6 +213,10 @@ and AddressSanitizer. 61. Whitespace at the end of a pcre2test pattern line caused a spurious error message if there were only single-character modifiers. It should be ignored. +62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results +or segmentation errors for some patterns. Found with libFuzzer and +AddressSanitizer. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 0a77223..fcd6249 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1621,7 +1621,7 @@ pattern's options. There is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is processed, it is replaced by a nested alternative sequence. If this contains a backslash (which is usually does), ptrend does not point to its end - it still -points to the end of the whole pattern. However, we can detect this case +points to the end of the whole pattern. However, we can detect this case because cb->nestptr[0] will be non-NULL. The nested sequences are all zero- terminated and there are only ever two levels of nesting. @@ -3187,9 +3187,10 @@ for (; ptr < cb->end_pattern; ptr++) if (ptr[1] != CHAR_QUESTION_MARK) { - if (ptr[1] != CHAR_ASTERISK && - (options & PCRE2_NO_AUTO_CAPTURE) == 0) - cb->bracount++; /* Capturing group */ + if (ptr[1] != CHAR_ASTERISK) + { + if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++; + } else /* (*something) - just skip to closing ket */ { ptr += 2; @@ -3717,7 +3718,7 @@ for (;; ptr++) if (c == CHAR_NULL && cb->nestptr[0] != NULL) { ptr = cb->nestptr[0]; - cb->nestptr[0] = cb->nestptr[1]; + cb->nestptr[0] = cb->nestptr[1]; cb->nestptr[1] = NULL; c = *ptr; } @@ -3846,7 +3847,7 @@ for (;; ptr++) /* Create auto callout, except for quantifiers, or while processing property strings that are substituted for \w etc in UCP mode. */ - if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier && + if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier && cb->nestptr[0] == NULL) { previous_callout = code; @@ -4140,8 +4141,8 @@ for (;; ptr++) int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); /* The posix_substitutes table specifies which POSIX classes can be - converted to \p or \P items. This can only happen at top nestling - level, as there will never be a POSIX class in a string that is + converted to \p or \P items. This can only happen at top nestling + level, as there will never be a POSIX class in a string that is substituted for something else. */ if (posix_substitutes[pc] != NULL) @@ -4282,7 +4283,7 @@ for (;; ptr++) case ESC_WU: /* or \P to test Unicode properties instead */ case ESC_su: /* of the default ASCII testing. This might be */ case ESC_SU: /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */ - cb->nestptr[1] = cb->nestptr[0]; + cb->nestptr[1] = cb->nestptr[0]; cb->nestptr[0] = ptr; ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ class_has_8bitchar--; /* Undo! */ @@ -4628,7 +4629,7 @@ for (;; ptr++) if (c == CHAR_NULL && cb->nestptr[0] != NULL) { ptr = cb->nestptr[0]; - cb->nestptr[0] = cb->nestptr[1]; + cb->nestptr[0] = cb->nestptr[1]; cb->nestptr[1] = NULL; c = *(++ptr); } @@ -7072,7 +7073,7 @@ for (;; ptr++) #endif /* The use of \C can be locked out. */ - + #ifdef NEVER_BACKSLASH_C else if (escape == ESC_C) { @@ -7085,7 +7086,7 @@ for (;; ptr++) *errorcodeptr = ERR83; goto FAILED; } -#endif +#endif /* For the rest (including \X when Unicode properties are supported), we can obtain the OP value by negating the escape value in the default diff --git a/testdata/testinput2 b/testdata/testinput2 index 8c7a18b..4add971 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4583,4 +4583,8 @@ B)x/alt_verbnames,mark /^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I +/((p(?'K/ + +/((p(?'K/no_auto_capture + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index b01484d..312b358 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14668,4 +14668,10 @@ Overall options: anchored Last code unit = '}' Subject length lower bound = 65535 +/((p(?'K/ +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator) + +/((p(?'K/no_auto_capture +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator) + # End of testinput2