diff --git a/ChangeLog b/ChangeLog index 161b4e0..da9e3ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -134,6 +134,9 @@ provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. 34. Give an error for an empty subpattern name such as (?''). +35. Make pcre2test give an error if a pattern that follows #forbud_utf contains +\P, \p, or \X. + Version 10.10 06-March-2015 --------------------------- diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 1a5741e..fa47792 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "22 April 2015" "PCRE 10.20" +.TH PCRE2TEST 1 "20 May 2015" "PCRE 10.20" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -237,13 +237,19 @@ following commands are recognized: #forbid_utf .sp Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP -options set, which locks out the use of UTF and Unicode property features. This -is a trigger guard that is used in test files to ensure that UTF or Unicode -property tests are not accidentally added to files that are used when Unicode -support is not included in the library. This effect can also be obtained by the -use of \fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be -unset, and the automatic options are not displayed in pattern information, to -avoid cluttering up test output. +options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and +the use of (*UTF) and (*UCP) at the start of patterns. This command also forces +an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX, +which are still supported when PCRE2_UTF is not set, but which require Unicode +property support to be included in the library. +.P +This is a trigger guard that is used in test files to ensure that UTF or +Unicode property tests are not accidentally added to files that are used when +Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and +PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP; +the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic +options are not displayed in pattern information, to avoid cluttering up test +output. .sp #load .sp @@ -1445,6 +1451,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 22 April 2015 +Last updated: 20 May 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 7719d88..992e573 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1729,8 +1729,12 @@ else if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) { - if (i > 0) c = (uint32_t)i; /* Positive is a data character */ - else escape = -i; /* Else return a special escape */ + if (i > 0) c = (uint32_t)i; else /* Positive is a data character */ + { + escape = -i; /* Else return a special escape */ + if (escape == ESC_P || escape == ESC_p || escape == ESC_X) + cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ + } } /* Escapes that need further processing, including those that are unknown. */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index f288f39..e2a9252 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -525,6 +525,7 @@ bytes in a code unit in that mode. */ #define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */ #define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */ #define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */ +#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) diff --git a/src/pcre2test.c b/src/pcre2test.c index 1162244..97ba5bb 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -4492,6 +4492,20 @@ if (TEST(compiled_code, ==, NULL)) fprintf(outfile, "\n"); return PR_SKIP; } + +/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are +locked out at compile time, but we must also check for occurrences of \P, \p, +and \X, which are only supported when Unicode is supported. */ + +if (forbid_utf != 0) + { + if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0) + { + fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the " + "#forbid_utf command\n"); + return PR_SKIP; + } + } /* Remember the maximum lookbehind, for partial matching. */