Fix hyphen after \E after POSIX class causing an error.
This commit is contained in:
parent
27eab0b836
commit
1226e2e0d0
|
@ -125,6 +125,9 @@ fully released code, but are noted here for the record.
|
||||||
(r) If a character whose code point was greater than 0xffff appeared within
|
(r) If a character whose code point was greater than 0xffff appeared within
|
||||||
a lookbehind that was within another lookbehind, the calculation of the
|
a lookbehind that was within another lookbehind, the calculation of the
|
||||||
lookbehind length went wrong and could provoke an internal error.
|
lookbehind length went wrong and could provoke an internal error.
|
||||||
|
|
||||||
|
(t) The sequence \E- or \Q\E- after a POSIX class in a character class caused
|
||||||
|
an internal error. Now the hyphen is treated as a literal.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
|
|
|
@ -3010,6 +3010,14 @@ while (ptr < ptrend)
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set "a hyphen is not the start of a range" just in case the POSIX
|
||||||
|
class is followed by \E or \Q\E (possibly repeated - fuzzers do that
|
||||||
|
kind of thing) and *then* a hyphen. This causes that hyphen to be
|
||||||
|
treated as a literal. I don't think it's worth setting up special
|
||||||
|
apparatus to do otherwise. */
|
||||||
|
|
||||||
|
class_range_state = RANGE_NO;
|
||||||
|
|
||||||
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
||||||
use Unicode properties \p or \P or, in one case, \h or \H. The
|
use Unicode properties \p or \P or, in one case, \h or \H. The
|
||||||
substitutes table has two values per class, containing the type and
|
substitutes table has two values per class, containing the type and
|
||||||
|
@ -4224,10 +4232,10 @@ return 0;
|
||||||
|
|
||||||
/* This function packages up the logic of adding a character or range of
|
/* This function packages up the logic of adding a character or range of
|
||||||
characters to a class. The character values in the arguments will be within the
|
characters to a class. The character values in the arguments will be within the
|
||||||
valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
|
valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
|
||||||
called only from within the "add to class" group of functions, some of which
|
called only from within the "add to class" group of functions, some of which
|
||||||
are recursive and mutually recursive. The external entry point is
|
are recursive and mutually recursive. The external entry point is
|
||||||
add_to_class().
|
add_to_class().
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
classbits the bit map for characters < 256
|
classbits the bit map for characters < 256
|
||||||
|
@ -4242,7 +4250,7 @@ Returns: the number of < 256 characters added
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static unsigned int
|
static unsigned int
|
||||||
add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
||||||
uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
|
uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
|
@ -4307,7 +4315,7 @@ can be used in all cases. */
|
||||||
|
|
||||||
if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
|
if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
|
||||||
end = MAX_NON_UTF_CHAR;
|
end = MAX_NON_UTF_CHAR;
|
||||||
|
|
||||||
if (start > cb->class_range_start && end < cb->class_range_end) return n8;
|
if (start > cb->class_range_start && end < cb->class_range_end) return n8;
|
||||||
|
|
||||||
/* Use the bitmap for characters < 256. Otherwise use extra data.*/
|
/* Use the bitmap for characters < 256. Otherwise use extra data.*/
|
||||||
|
@ -4380,8 +4388,8 @@ return n8; /* Number of 8-bit characters */
|
||||||
/* This function is used for adding a list of case-equivalent characters to a
|
/* This function is used for adding a list of case-equivalent characters to a
|
||||||
class, and also for adding a list of horizontal or vertical whitespace. If the
|
class, and also for adding a list of horizontal or vertical whitespace. If the
|
||||||
list is in order (which it should be), ranges of characters are detected and
|
list is in order (which it should be), ranges of characters are detected and
|
||||||
handled appropriately. This function is called (sometimes recursively) only
|
handled appropriately. This function is called (sometimes recursively) only
|
||||||
from within the "add to class" set of functions. The external entry point is
|
from within the "add to class" set of functions. The external entry point is
|
||||||
add_list_to_class().
|
add_list_to_class().
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -4399,7 +4407,7 @@ Returns: the number of < 256 characters added
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static unsigned int
|
static unsigned int
|
||||||
add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
||||||
uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
|
uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
|
||||||
{
|
{
|
||||||
unsigned int n8 = 0;
|
unsigned int n8 = 0;
|
||||||
|
@ -4422,7 +4430,7 @@ return n8;
|
||||||
* External entry point for add range to class *
|
* External entry point for add range to class *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function sets the overall range so that the internal functions can try
|
/* This function sets the overall range so that the internal functions can try
|
||||||
to avoid duplication when handling case-independence.
|
to avoid duplication when handling case-independence.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -4451,7 +4459,7 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
|
||||||
* External entry point for add list to class *
|
* External entry point for add list to class *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function sets the overall range so that the internal functions can try
|
/* This function sets the overall range so that the internal functions can try
|
||||||
to avoid duplication when handling case-independence.
|
to avoid duplication when handling case-independence.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -4480,7 +4488,7 @@ while (p[0] < NOTACHAR)
|
||||||
{
|
{
|
||||||
while(p[n+1] == p[0] + n + 1) n++;
|
while(p[n+1] == p[0] + n + 1) n++;
|
||||||
cb->class_range_start = p[0];
|
cb->class_range_start = p[0];
|
||||||
cb->class_range_end = p[n];
|
cb->class_range_end = p[n];
|
||||||
n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
|
n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
|
||||||
}
|
}
|
||||||
p += n + 1;
|
p += n + 1;
|
||||||
|
@ -4736,7 +4744,7 @@ for (;; pptr++)
|
||||||
|
|
||||||
meta = META_CODE(*pptr);
|
meta = META_CODE(*pptr);
|
||||||
meta_arg = META_DATA(*pptr);
|
meta_arg = META_DATA(*pptr);
|
||||||
|
|
||||||
/* If we are in the pre-compile phase, accumulate the length used for the
|
/* If we are in the pre-compile phase, accumulate the length used for the
|
||||||
previous cycle of this loop, unless the next item is a quantifier. */
|
previous cycle of this loop, unless the next item is a quantifier. */
|
||||||
|
|
||||||
|
@ -5148,30 +5156,30 @@ for (;; pptr++)
|
||||||
should_flip_negation = TRUE;
|
should_flip_negation = TRUE;
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
|
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* When adding the horizontal or vertical space lists to a class, or
|
/* When adding the horizontal or vertical space lists to a class, or
|
||||||
their complements, disable PCRE2_CASELESS, because it justs wastes
|
their complements, disable PCRE2_CASELESS, because it justs wastes
|
||||||
time, and in the "not-x" UTF cases can create unwanted duplicates in
|
time, and in the "not-x" UTF cases can create unwanted duplicates in
|
||||||
the XCLASS list (provoked by characters that have more than one other
|
the XCLASS list (provoked by characters that have more than one other
|
||||||
case and by both cases being in the same "not-x" sublist). */
|
case and by both cases being in the same "not-x" sublist). */
|
||||||
|
|
||||||
case ESC_h:
|
case ESC_h:
|
||||||
(void)add_list_to_class(classbits, &class_uchardata,
|
(void)add_list_to_class(classbits, &class_uchardata,
|
||||||
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
|
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_H:
|
case ESC_H:
|
||||||
(void)add_not_list_to_class(classbits, &class_uchardata,
|
(void)add_not_list_to_class(classbits, &class_uchardata,
|
||||||
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
|
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_v:
|
case ESC_v:
|
||||||
(void)add_list_to_class(classbits, &class_uchardata,
|
(void)add_list_to_class(classbits, &class_uchardata,
|
||||||
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
|
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_V:
|
case ESC_V:
|
||||||
(void)add_not_list_to_class(classbits, &class_uchardata,
|
(void)add_not_list_to_class(classbits, &class_uchardata,
|
||||||
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
|
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -5820,4 +5820,10 @@ ef) x/x,mark
|
||||||
|
|
||||||
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
|
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
|
||||||
|
|
||||||
|
/[s[:digit:]\E-H]+/
|
||||||
|
s09-H
|
||||||
|
|
||||||
|
/[s[:digit:]\Q\E-H]+/
|
||||||
|
s09-H
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -9297,4 +9297,12 @@ No match
|
||||||
|
|
||||||
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
|
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
|
||||||
|
|
||||||
|
/[s[:digit:]\E-H]+/
|
||||||
|
s09-H
|
||||||
|
0: s09-H
|
||||||
|
|
||||||
|
/[s[:digit:]\Q\E-H]+/
|
||||||
|
s09-H
|
||||||
|
0: s09-H
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
Loading…
Reference in New Issue