Make pcre2test give an error for \P, \p, and \X after #forbid_utf.
This commit is contained in:
parent
13576ba188
commit
9ce0207f5c
|
@ -134,6 +134,9 @@ provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
|
||||||
|
|
||||||
34. Give an error for an empty subpattern name such as (?'').
|
34. Give an error for an empty subpattern name such as (?'').
|
||||||
|
|
||||||
|
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
|
||||||
|
\P, \p, or \X.
|
||||||
|
|
||||||
|
|
||||||
Version 10.10 06-March-2015
|
Version 10.10 06-March-2015
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "22 April 2015" "PCRE 10.20"
|
.TH PCRE2TEST 1 "20 May 2015" "PCRE 10.20"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -237,13 +237,19 @@ following commands are recognized:
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
.sp
|
.sp
|
||||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
|
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
|
||||||
options set, which locks out the use of UTF and Unicode property features. This
|
options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and
|
||||||
is a trigger guard that is used in test files to ensure that UTF or Unicode
|
the use of (*UTF) and (*UCP) at the start of patterns. This command also forces
|
||||||
property tests are not accidentally added to files that are used when Unicode
|
an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX,
|
||||||
support is not included in the library. This effect can also be obtained by the
|
which are still supported when PCRE2_UTF is not set, but which require Unicode
|
||||||
use of \fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be
|
property support to be included in the library.
|
||||||
unset, and the automatic options are not displayed in pattern information, to
|
.P
|
||||||
avoid cluttering up test output.
|
This is a trigger guard that is used in test files to ensure that UTF or
|
||||||
|
Unicode property tests are not accidentally added to files that are used when
|
||||||
|
Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and
|
||||||
|
PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP;
|
||||||
|
the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic
|
||||||
|
options are not displayed in pattern information, to avoid cluttering up test
|
||||||
|
output.
|
||||||
.sp
|
.sp
|
||||||
#load <filename>
|
#load <filename>
|
||||||
.sp
|
.sp
|
||||||
|
@ -1445,6 +1451,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 22 April 2015
|
Last updated: 20 May 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1729,8 +1729,12 @@ else if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */
|
||||||
|
|
||||||
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||||||
{
|
{
|
||||||
if (i > 0) c = (uint32_t)i; /* Positive is a data character */
|
if (i > 0) c = (uint32_t)i; else /* Positive is a data character */
|
||||||
else escape = -i; /* Else return a special escape */
|
{
|
||||||
|
escape = -i; /* Else return a special escape */
|
||||||
|
if (escape == ESC_P || escape == ESC_p || escape == ESC_X)
|
||||||
|
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Escapes that need further processing, including those that are unknown. */
|
/* Escapes that need further processing, including those that are unknown. */
|
||||||
|
|
|
@ -525,6 +525,7 @@ bytes in a code unit in that mode. */
|
||||||
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
|
||||||
#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */
|
#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */
|
||||||
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
|
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
|
||||||
|
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
||||||
|
|
||||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||||
|
|
||||||
|
|
|
@ -4493,6 +4493,20 @@ if (TEST(compiled_code, ==, NULL))
|
||||||
return PR_SKIP;
|
return PR_SKIP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
|
||||||
|
locked out at compile time, but we must also check for occurrences of \P, \p,
|
||||||
|
and \X, which are only supported when Unicode is supported. */
|
||||||
|
|
||||||
|
if (forbid_utf != 0)
|
||||||
|
{
|
||||||
|
if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
|
||||||
|
"#forbid_utf command\n");
|
||||||
|
return PR_SKIP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Remember the maximum lookbehind, for partial matching. */
|
/* Remember the maximum lookbehind, for partial matching. */
|
||||||
|
|
||||||
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
||||||
|
|
Loading…
Reference in New Issue