Save extra compile options with the compiled pattern, and add an info call to

retrieve them.
This commit is contained in:
Philip.Hazel 2017-12-16 17:49:26 +00:00
parent 5cbab74c97
commit 814cc96bc5
16 changed files with 68 additions and 22 deletions

View File

@ -85,6 +85,10 @@ didn't).
20. Allocate a single callout block on the stack at the start of pcre2_match() 20. Allocate a single callout block on the stack at the start of pcre2_match()
and set its never-changing fields once only. and set its never-changing fields once only.
21. Save the extra compile options (set in the compile context) with the
compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS
to retrieve them, and update pcre2test to show them.
Version 10.30 14-August-2017 Version 10.30 14-August-2017
---------------------------- ----------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30" .TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS .SH SYNOPSIS
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
This function returns information about a compiled pattern. Its arguments are: This function returns information about a compiled pattern. Its arguments are:
.sp .sp
\fIcode\fP Pointer to a compiled regular expression \fIcode\fP Pointer to a compiled regular expression pattern
\fIwhat\fP What information is required \fIwhat\fP What information is required
\fIwhere\fP Where to put the information \fIwhere\fP Where to put the information
.sp .sp
@ -32,6 +32,8 @@ request are as follows:
.\" JOIN .\" JOIN
PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set, PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set,
otherwise PCRE2_ERROR_UNSET otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the
compile context
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
0 nothing set 0 nothing set

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "14 November 2017" "PCRE2 10.31" .TH PCRE2API 3 "16 December 2017" "PCRE2 10.31"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -1904,12 +1904,15 @@ are as follows:
.sp .sp
PCRE2_INFO_ALLOPTIONS PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS PCRE2_INFO_ARGOPTIONS
PCRE2_INFO_EXTRAOPTIONS
.sp .sp
Return a copy of the pattern's options. The third argument should point to a Return copies of the pattern's options. The third argument should point to a
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level (*XXX) option settings such as the compile options as modified by any top-level (*XXX) option settings such as
(*UTF) at the start of the pattern itself. (*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the
extra options that were set in the compile context by calling the
pcre2_set_compile_extra_options() function.
.P .P
For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED
option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF.
@ -3597,6 +3600,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 22 October 2017 Last updated: 16 December 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -418,6 +418,7 @@ released, the numbers must not be changed. */
#define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25 #define PCRE2_INFO_HEAPLIMIT 25
#define PCRE2_INFO_EXTRAOPTIONS 26
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */

View File

@ -418,6 +418,7 @@ released, the numbers must not be changed. */
#define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25 #define PCRE2_INFO_HEAPLIMIT 25
#define PCRE2_INFO_EXTRAOPTIONS 26
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */

View File

@ -9485,6 +9485,7 @@ re->blocksize = re_blocksize;
re->magic_number = MAGIC_NUMBER; re->magic_number = MAGIC_NUMBER;
re->compile_options = options; re->compile_options = options;
re->overall_options = cb.external_options; re->overall_options = cb.external_options;
re->extra_options = ccontext->extra_options;
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
re->limit_heap = limit_heap; re->limit_heap = limit_heap;
re->limit_match = limit_match; re->limit_match = limit_match;

View File

@ -623,6 +623,7 @@ typedef struct pcre2_real_code {
uint32_t magic_number; /* Paranoid and endianness check */ uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */ uint32_t overall_options; /* Options after processing the pattern */
uint32_t extra_options; /* Taken from compile_context */
uint32_t flags; /* Various state flags */ uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */ uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */

View File

@ -76,6 +76,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_BSR: case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT: case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_DEPTHLIMIT: case PCRE2_INFO_DEPTHLIMIT:
case PCRE2_INFO_EXTRAOPTIONS:
case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT: case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC: case PCRE2_INFO_HASBACKSLASHC:
@ -144,6 +145,10 @@ switch(what)
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET; if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
break; break;
case PCRE2_INFO_EXTRAOPTIONS:
*((uint32_t *)where) = re->extra_options;
break;
case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;

View File

@ -4073,8 +4073,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
* Show compile extra options * * Show compile extra options *
*************************************************/ *************************************************/
/* Called only for unsupported POSIX options at present, and therefore needed /* Called from show_pattern_info() and for unsupported POSIX options.
only when the 8-bit library is being compiled.
Arguments: Arguments:
options an options word options an options word
@ -4084,7 +4083,6 @@ Arguments:
Returns: nothing Returns: nothing
*/ */
#ifdef SUPPORT_PCRE2_8
static void static void
show_compile_extra_options(uint32_t options, const char *before, show_compile_extra_options(uint32_t options, const char *before,
const char *after) const char *after)
@ -4098,7 +4096,6 @@ else fprintf(outfile, "%s%s%s%s%s%s",
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
after); after);
} }
#endif
@ -4272,7 +4269,7 @@ Returns: PR_OK continue processing next line
static int static int
show_pattern_info(void) show_pattern_info(void)
{ {
uint32_t compile_options, overall_options; uint32_t compile_options, overall_options, extra_options;
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{ {
@ -4412,6 +4409,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */ cluttering up the verification output of non-UTF test files. */
@ -4438,6 +4436,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
show_compile_options(overall_options, "Overall options:", "\n"); show_compile_options(overall_options, "Overall options:", "\n");
} }
} }
if (extra_options != 0)
show_compile_extra_options(extra_options, "Extra options:", "\n");
if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); if (jchanged) fprintf(outfile, "Duplicate name status changes\n");

View File

@ -461,7 +461,7 @@
# A special extra option allows excaped surrogate code points in 8-bit mode, # A special extra option allows excaped surrogate code points in 8-bit mode,
# but subjects containing them must not be UTF-checked. # but subjects containing them must not be UTF-checked.
/\x{d800}/utf,allow_surrogate_escapes /\x{d800}/I,utf,allow_surrogate_escapes
\x{d800}\=no_utf_check \x{d800}\=no_utf_check
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

View File

@ -367,7 +367,7 @@
# but subjects containing them must not be UTF-checked. These patterns give # but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode. # errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes /\x{d800}/I,utf,allow_surrogate_escapes
\x{d800}\=no_utf_check \x{d800}\=no_utf_check
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

6
testdata/testinput2 vendored
View File

@ -5287,7 +5287,7 @@ a)"xI
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
/\N{\c/B,bad_escape_is_literal /\N{\c/IB,bad_escape_is_literal
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal /[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
@ -5330,14 +5330,14 @@ a)"xI
/(*CR)abc/literal /(*CR)abc/literal
(*CR)abc (*CR)abc
/cat|dog/match_word /cat|dog/I,match_word
the cat sat the cat sat
\= Expect no match \= Expect no match
caterpillar caterpillar
snowcat snowcat
syndicate syndicate
/(cat)|dog/match_line,literal /(cat)|dog/I,match_line,literal
(cat)|dog (cat)|dog
\= Expect no match \= Expect no match
the cat sat the cat sat

View File

@ -1578,7 +1578,13 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
# A special extra option allows excaped surrogate code points in 8-bit mode, # A special extra option allows excaped surrogate code points in 8-bit mode,
# but subjects containing them must not be UTF-checked. # but subjects containing them must not be UTF-checked.
/\x{d800}/utf,allow_surrogate_escapes /\x{d800}/I,utf,allow_surrogate_escapes
Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \xed
Last code unit = \x80
Subject length lower bound = 1
\x{d800}\=no_utf_check \x{d800}\=no_utf_check
0: \x{d800} 0: \x{d800}

View File

@ -1425,7 +1425,7 @@ No match
# but subjects containing them must not be UTF-checked. These patterns give # but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode. # errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes /\x{d800}/I,utf,allow_surrogate_escapes
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
\x{d800}\=no_utf_check \x{d800}\=no_utf_check

View File

@ -1417,7 +1417,12 @@ No match
# but subjects containing them must not be UTF-checked. These patterns give # but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode. # errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes /\x{d800}/I,utf,allow_surrogate_escapes
Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \x{d800}
Subject length lower bound = 1
\x{d800}\=no_utf_check \x{d800}\=no_utf_check
0: \x{d800} 0: \x{d800}

22
testdata/testoutput2 vendored
View File

@ -16180,13 +16180,18 @@ Subject length lower bound = 1
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
** Unrecognized modifier '\' in '\bad_escape_is_literal' ** Unrecognized modifier '\' in '\bad_escape_is_literal'
/\N{\c/B,bad_escape_is_literal /\N{\c/IB,bad_escape_is_literal
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
N{c N{c
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
Capturing subpattern count = 0
Extra options: bad_escape_is_literal
First code unit = 'N'
Last code unit = 'c'
Subject length lower bound = 3
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal /[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
------------------------------------------------------------------ ------------------------------------------------------------------
@ -16269,7 +16274,12 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL
(*CR)abc (*CR)abc
0: (*CR)abc 0: (*CR)abc
/cat|dog/match_word /cat|dog/I,match_word
Capturing subpattern count = 0
Max lookbehind = 1
Extra options: match_word
Starting code units: c d
Subject length lower bound = 3
the cat sat the cat sat
0: cat 0: cat
\= Expect no match \= Expect no match
@ -16280,7 +16290,13 @@ No match
syndicate syndicate
No match No match
/(cat)|dog/match_line,literal /(cat)|dog/I,match_line,literal
Capturing subpattern count = 0
Compile options: literal
Overall options: anchored literal
Extra options: match_line
First code unit = '('
Subject length lower bound = 9
(cat)|dog (cat)|dog
0: (cat)|dog 0: (cat)|dog
\= Expect no match \= Expect no match