Fix negated POSIX class bug.
This commit is contained in:
parent
aec5c96cf5
commit
291ececa58
|
@ -337,6 +337,9 @@ misbehaved. This bug was found by the LLVM fuzzer.
|
||||||
100. The error for an invalid UTF pattern string always gave the code unit
|
100. The error for an invalid UTF pattern string always gave the code unit
|
||||||
offset as zero instead of where the invalidity was found.
|
offset as zero instead of where the invalidity was found.
|
||||||
|
|
||||||
|
101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not
|
||||||
|
working correctly in UCP mode.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -3857,7 +3857,7 @@ for (;; ptr++)
|
||||||
{
|
{
|
||||||
BOOL negate_class;
|
BOOL negate_class;
|
||||||
BOOL should_flip_negation;
|
BOOL should_flip_negation;
|
||||||
BOOL match_all_wide_chars;
|
BOOL match_all_or_no_wide_chars;
|
||||||
BOOL possessive_quantifier;
|
BOOL possessive_quantifier;
|
||||||
BOOL is_quantifier;
|
BOOL is_quantifier;
|
||||||
BOOL is_recurse;
|
BOOL is_recurse;
|
||||||
|
@ -4207,9 +4207,10 @@ for (;; ptr++)
|
||||||
/* If a non-extended class contains a negative special such as \S, we need
|
/* If a non-extended class contains a negative special such as \S, we need
|
||||||
to flip the negation flag at the end, so that support for characters > 255
|
to flip the negation flag at the end, so that support for characters > 255
|
||||||
works correctly (they are all included in the class). An extended class may
|
works correctly (they are all included in the class). An extended class may
|
||||||
need to insert specific matching code for wide characters. */
|
need to insert specific matching or non-matching code for wide characters.
|
||||||
|
*/
|
||||||
|
|
||||||
should_flip_negation = match_all_wide_chars = FALSE;
|
should_flip_negation = match_all_or_no_wide_chars = FALSE;
|
||||||
|
|
||||||
/* Extended class (xclass) will be used when characters > 255
|
/* Extended class (xclass) will be used when characters > 255
|
||||||
might match. */
|
might match. */
|
||||||
|
@ -4365,21 +4366,20 @@ for (;; ptr++)
|
||||||
|
|
||||||
/* For the other POSIX classes (ascii, xdigit) we are going to fall
|
/* For the other POSIX classes (ascii, xdigit) we are going to fall
|
||||||
through to the non-UCP case and build a bit map for characters with
|
through to the non-UCP case and build a bit map for characters with
|
||||||
code points less than 256. If we are in a negated POSIX class
|
code points less than 256. However, if we are in a negated POSIX
|
||||||
within a non-negated overall class, characters with code points
|
class, characters with code points greater than 255 must either all
|
||||||
greater than 255 must all match. In the special case where we have
|
match or all not match, depending on whether the whole class is not
|
||||||
not yet generated any xclass data, and this is the final item in
|
or is negated. For example, for [[:^ascii:]... they must all match,
|
||||||
the overall class, we need do nothing: later on, the opcode
|
whereas for [^[:^xdigit:]... they must not.
|
||||||
OP_NCLASS will be used to indicate that characters greater than 255
|
|
||||||
are acceptable. If we have already seen an xclass item or one may
|
In the special case where there are no xclass items, this is
|
||||||
follow (we have to assume that it might if this is not the end of
|
automatically handled by the use of OP_CLASS or OP_NCLASS, but an
|
||||||
the class), set a flag to cause the generation of an explicit range
|
explicit range is needed for OP_XCLASS. Setting a flag here causes
|
||||||
for all wide codepoints. */
|
the range to be generated later when it is known that OP_XCLASS is
|
||||||
|
required. */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (!negate_class && local_negate &&
|
match_all_or_no_wide_chars |= local_negate;
|
||||||
(xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
|
|
||||||
match_all_wide_chars = TRUE;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4878,13 +4878,14 @@ for (;; ptr++)
|
||||||
(\p or \P), we have to compile an extended class, with its own opcode,
|
(\p or \P), we have to compile an extended class, with its own opcode,
|
||||||
unless there were no property settings and there was a negated special such
|
unless there were no property settings and there was a negated special such
|
||||||
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
||||||
characters > 255 are in the class, so any that were explicitly given as
|
characters > 255 are in or not in the class, so any that were explicitly
|
||||||
well can be ignored.
|
given as well can be ignored.
|
||||||
|
|
||||||
In the UCP case, if certain negated POSIX classes ([:^ascii:] or
|
In the UCP case, if certain negated POSIX classes ([:^ascii:] or
|
||||||
{^:xdigit:]) were present in a non-negative class, we again have to match
|
[^:xdigit:]) were present in a class, we either have to match or not match
|
||||||
all wide characters, indicated by match_all_wide_chars being true. We do
|
all wide characters (depending on whether the whole class is or is not
|
||||||
this by including an explicit range.
|
negated). This requirement is indicated by match_all_or_no_wide_chars being
|
||||||
|
true. We do this by including an explicit range, which works in both cases.
|
||||||
|
|
||||||
If, when generating an xclass, there are no characters < 256, we can omit
|
If, when generating an xclass, there are no characters < 256, we can omit
|
||||||
the bitmap in the actual compiled code. */
|
the bitmap in the actual compiled code. */
|
||||||
|
@ -4897,12 +4898,11 @@ for (;; ptr++)
|
||||||
if (xclass && (xclass_has_prop || !should_flip_negation))
|
if (xclass && (xclass_has_prop || !should_flip_negation))
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
if (match_all_wide_chars)
|
if (match_all_or_no_wide_chars)
|
||||||
{
|
{
|
||||||
*class_uchardata++ = XCL_RANGE;
|
*class_uchardata++ = XCL_RANGE;
|
||||||
class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
|
class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
|
||||||
class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT,
|
class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
|
||||||
class_uchardata);
|
|
||||||
}
|
}
|
||||||
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
||||||
*code++ = OP_XCLASS;
|
*code++ = OP_XCLASS;
|
||||||
|
|
Loading…
Reference in New Issue