Fix hyphen after \E after POSIX class causing an error.

This commit is contained in:
Philip.Hazel 2017-01-11 16:40:35 +00:00
parent 27eab0b836
commit 1226e2e0d0
4 changed files with 47 additions and 22 deletions

View File

@ -125,6 +125,9 @@ fully released code, but are noted here for the record.
(r) If a character whose code point was greater than 0xffff appeared within (r) If a character whose code point was greater than 0xffff appeared within
a lookbehind that was within another lookbehind, the calculation of the a lookbehind that was within another lookbehind, the calculation of the
lookbehind length went wrong and could provoke an internal error. lookbehind length went wrong and could provoke an internal error.
(t) The sequence \E- or \Q\E- after a POSIX class in a character class caused
an internal error. Now the hyphen is treated as a literal.
4. Back references are now permitted in lookbehind assertions when there are 4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the no duplicated group numbers (that is, (?| has not been used), and, if the

View File

@ -3010,6 +3010,14 @@ while (ptr < ptrend)
goto FAILED; goto FAILED;
} }
/* Set "a hyphen is not the start of a range" just in case the POSIX
class is followed by \E or \Q\E (possibly repeated - fuzzers do that
kind of thing) and *then* a hyphen. This causes that hyphen to be
treated as a literal. I don't think it's worth setting up special
apparatus to do otherwise. */
class_range_state = RANGE_NO;
/* When PCRE2_UCP is set, some of the POSIX classes are converted to /* When PCRE2_UCP is set, some of the POSIX classes are converted to
use Unicode properties \p or \P or, in one case, \h or \H. The use Unicode properties \p or \P or, in one case, \h or \H. The
substitutes table has two values per class, containing the type and substitutes table has two values per class, containing the type and
@ -4224,10 +4232,10 @@ return 0;
/* This function packages up the logic of adding a character or range of /* This function packages up the logic of adding a character or range of
characters to a class. The character values in the arguments will be within the characters to a class. The character values in the arguments will be within the
valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
called only from within the "add to class" group of functions, some of which called only from within the "add to class" group of functions, some of which
are recursive and mutually recursive. The external entry point is are recursive and mutually recursive. The external entry point is
add_to_class(). add_to_class().
Arguments: Arguments:
classbits the bit map for characters < 256 classbits the bit map for characters < 256
@ -4242,7 +4250,7 @@ Returns: the number of < 256 characters added
*/ */
static unsigned int static unsigned int
add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
uint32_t options, compile_block *cb, uint32_t start, uint32_t end) uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
{ {
uint32_t c; uint32_t c;
@ -4307,7 +4315,7 @@ can be used in all cases. */
if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
end = MAX_NON_UTF_CHAR; end = MAX_NON_UTF_CHAR;
if (start > cb->class_range_start && end < cb->class_range_end) return n8; if (start > cb->class_range_start && end < cb->class_range_end) return n8;
/* Use the bitmap for characters < 256. Otherwise use extra data.*/ /* Use the bitmap for characters < 256. Otherwise use extra data.*/
@ -4380,8 +4388,8 @@ return n8; /* Number of 8-bit characters */
/* This function is used for adding a list of case-equivalent characters to a /* This function is used for adding a list of case-equivalent characters to a
class, and also for adding a list of horizontal or vertical whitespace. If the class, and also for adding a list of horizontal or vertical whitespace. If the
list is in order (which it should be), ranges of characters are detected and list is in order (which it should be), ranges of characters are detected and
handled appropriately. This function is called (sometimes recursively) only handled appropriately. This function is called (sometimes recursively) only
from within the "add to class" set of functions. The external entry point is from within the "add to class" set of functions. The external entry point is
add_list_to_class(). add_list_to_class().
Arguments: Arguments:
@ -4399,7 +4407,7 @@ Returns: the number of < 256 characters added
*/ */
static unsigned int static unsigned int
add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr, add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except) uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
{ {
unsigned int n8 = 0; unsigned int n8 = 0;
@ -4422,7 +4430,7 @@ return n8;
* External entry point for add range to class * * External entry point for add range to class *
*************************************************/ *************************************************/
/* This function sets the overall range so that the internal functions can try /* This function sets the overall range so that the internal functions can try
to avoid duplication when handling case-independence. to avoid duplication when handling case-independence.
Arguments: Arguments:
@ -4451,7 +4459,7 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
* External entry point for add list to class * * External entry point for add list to class *
*************************************************/ *************************************************/
/* This function sets the overall range so that the internal functions can try /* This function sets the overall range so that the internal functions can try
to avoid duplication when handling case-independence. to avoid duplication when handling case-independence.
Arguments: Arguments:
@ -4480,7 +4488,7 @@ while (p[0] < NOTACHAR)
{ {
while(p[n+1] == p[0] + n + 1) n++; while(p[n+1] == p[0] + n + 1) n++;
cb->class_range_start = p[0]; cb->class_range_start = p[0];
cb->class_range_end = p[n]; cb->class_range_end = p[n];
n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]); n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
} }
p += n + 1; p += n + 1;
@ -4736,7 +4744,7 @@ for (;; pptr++)
meta = META_CODE(*pptr); meta = META_CODE(*pptr);
meta_arg = META_DATA(*pptr); meta_arg = META_DATA(*pptr);
/* If we are in the pre-compile phase, accumulate the length used for the /* If we are in the pre-compile phase, accumulate the length used for the
previous cycle of this loop, unless the next item is a quantifier. */ previous cycle of this loop, unless the next item is a quantifier. */
@ -5148,30 +5156,30 @@ for (;; pptr++)
should_flip_negation = TRUE; should_flip_negation = TRUE;
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space]; for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
break; break;
/* When adding the horizontal or vertical space lists to a class, or /* When adding the horizontal or vertical space lists to a class, or
their complements, disable PCRE2_CASELESS, because it justs wastes their complements, disable PCRE2_CASELESS, because it justs wastes
time, and in the "not-x" UTF cases can create unwanted duplicates in time, and in the "not-x" UTF cases can create unwanted duplicates in
the XCLASS list (provoked by characters that have more than one other the XCLASS list (provoked by characters that have more than one other
case and by both cases being in the same "not-x" sublist). */ case and by both cases being in the same "not-x" sublist). */
case ESC_h: case ESC_h:
(void)add_list_to_class(classbits, &class_uchardata, (void)add_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR); options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
break; break;
case ESC_H: case ESC_H:
(void)add_not_list_to_class(classbits, &class_uchardata, (void)add_not_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list)); options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
break; break;
case ESC_v: case ESC_v:
(void)add_list_to_class(classbits, &class_uchardata, (void)add_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR); options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
break; break;
case ESC_V: case ESC_V:
(void)add_not_list_to_class(classbits, &class_uchardata, (void)add_not_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list)); options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
break; break;

6
testdata/testinput1 vendored
View File

@ -5820,4 +5820,10 @@ ef) x/x,mark
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
/[s[:digit:]\E-H]+/
s09-H
/[s[:digit:]\Q\E-H]+/
s09-H
# End of testinput1 # End of testinput1

View File

@ -9297,4 +9297,12 @@ No match
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/ /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
/[s[:digit:]\E-H]+/
s09-H
0: s09-H
/[s[:digit:]\Q\E-H]+/
s09-H
0: s09-H
# End of testinput1 # End of testinput1