Make pcre2test give an error for \P, \p, and \X after #forbid_utf.
This commit is contained in:
parent
13576ba188
commit
9ce0207f5c
|
@ -134,6 +134,9 @@ provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
|
|||
|
||||
34. Give an error for an empty subpattern name such as (?'').
|
||||
|
||||
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
|
||||
\P, \p, or \X.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "22 April 2015" "PCRE 10.20"
|
||||
.TH PCRE2TEST 1 "20 May 2015" "PCRE 10.20"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -237,13 +237,19 @@ following commands are recognized:
|
|||
#forbid_utf
|
||||
.sp
|
||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
|
||||
options set, which locks out the use of UTF and Unicode property features. This
|
||||
is a trigger guard that is used in test files to ensure that UTF or Unicode
|
||||
property tests are not accidentally added to files that are used when Unicode
|
||||
support is not included in the library. This effect can also be obtained by the
|
||||
use of \fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be
|
||||
unset, and the automatic options are not displayed in pattern information, to
|
||||
avoid cluttering up test output.
|
||||
options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and
|
||||
the use of (*UTF) and (*UCP) at the start of patterns. This command also forces
|
||||
an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX,
|
||||
which are still supported when PCRE2_UTF is not set, but which require Unicode
|
||||
property support to be included in the library.
|
||||
.P
|
||||
This is a trigger guard that is used in test files to ensure that UTF or
|
||||
Unicode property tests are not accidentally added to files that are used when
|
||||
Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and
|
||||
PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP;
|
||||
the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic
|
||||
options are not displayed in pattern information, to avoid cluttering up test
|
||||
output.
|
||||
.sp
|
||||
#load <filename>
|
||||
.sp
|
||||
|
@ -1445,6 +1451,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 22 April 2015
|
||||
Last updated: 20 May 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1729,8 +1729,12 @@ else if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */
|
|||
|
||||
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||||
{
|
||||
if (i > 0) c = (uint32_t)i; /* Positive is a data character */
|
||||
else escape = -i; /* Else return a special escape */
|
||||
if (i > 0) c = (uint32_t)i; else /* Positive is a data character */
|
||||
{
|
||||
escape = -i; /* Else return a special escape */
|
||||
if (escape == ESC_P || escape == ESC_p || escape == ESC_X)
|
||||
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
|
||||
}
|
||||
}
|
||||
|
||||
/* Escapes that need further processing, including those that are unknown. */
|
||||
|
|
|
@ -525,6 +525,7 @@ bytes in a code unit in that mode. */
|
|||
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
||||
#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */
|
||||
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
|
||||
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
|
|
|
@ -4493,6 +4493,20 @@ if (TEST(compiled_code, ==, NULL))
|
|||
return PR_SKIP;
|
||||
}
|
||||
|
||||
/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
|
||||
locked out at compile time, but we must also check for occurrences of \P, \p,
|
||||
and \X, which are only supported when Unicode is supported. */
|
||||
|
||||
if (forbid_utf != 0)
|
||||
{
|
||||
if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
|
||||
{
|
||||
fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
|
||||
"#forbid_utf command\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remember the maximum lookbehind, for partial matching. */
|
||||
|
||||
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
||||
|
|
Loading…
Reference in New Issue