Save extra compile options with the compiled pattern, and add an info call to

retrieve them.
This commit is contained in:
Philip.Hazel 2017-12-16 17:49:26 +00:00
parent 5cbab74c97
commit 814cc96bc5
16 changed files with 68 additions and 22 deletions

View File

@ -85,6 +85,10 @@ didn't).
20. Allocate a single callout block on the stack at the start of pcre2_match()
and set its never-changing fields once only.
21. Save the extra compile options (set in the compile context) with the
compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS
to retrieve them, and update pcre2test to show them.
Version 10.30 14-August-2017
----------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30"
.TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This function returns information about a compiled pattern. Its arguments are:
.sp
\fIcode\fP Pointer to a compiled regular expression
\fIcode\fP Pointer to a compiled regular expression pattern
\fIwhat\fP What information is required
\fIwhere\fP Where to put the information
.sp
@ -32,6 +32,8 @@ request are as follows:
.\" JOIN
PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set,
otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the
compile context
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
0 nothing set

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "14 November 2017" "PCRE2 10.31"
.TH PCRE2API 3 "16 December 2017" "PCRE2 10.31"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -1904,12 +1904,15 @@ are as follows:
.sp
PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS
PCRE2_INFO_EXTRAOPTIONS
.sp
Return a copy of the pattern's options. The third argument should point to a
Return copies of the pattern's options. The third argument should point to a
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level (*XXX) option settings such as
(*UTF) at the start of the pattern itself.
(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the
extra options that were set in the compile context by calling the
pcre2_set_compile_extra_options() function.
.P
For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED
option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF.
@ -3597,6 +3600,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 22 October 2017
Last updated: 16 December 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -418,6 +418,7 @@ released, the numbers must not be changed. */
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
#define PCRE2_INFO_EXTRAOPTIONS 26
/* Request types for pcre2_config(). */

View File

@ -418,6 +418,7 @@ released, the numbers must not be changed. */
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
#define PCRE2_INFO_EXTRAOPTIONS 26
/* Request types for pcre2_config(). */

View File

@ -9485,6 +9485,7 @@ re->blocksize = re_blocksize;
re->magic_number = MAGIC_NUMBER;
re->compile_options = options;
re->overall_options = cb.external_options;
re->extra_options = ccontext->extra_options;
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
re->limit_heap = limit_heap;
re->limit_match = limit_match;

View File

@ -623,6 +623,7 @@ typedef struct pcre2_real_code {
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t extra_options; /* Taken from compile_context */
uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */

View File

@ -76,6 +76,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_DEPTHLIMIT:
case PCRE2_INFO_EXTRAOPTIONS:
case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
@ -144,6 +145,10 @@ switch(what)
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_EXTRAOPTIONS:
*((uint32_t *)where) = re->extra_options;
break;
case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;

View File

@ -4073,8 +4073,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
* Show compile extra options *
*************************************************/
/* Called only for unsupported POSIX options at present, and therefore needed
only when the 8-bit library is being compiled.
/* Called from show_pattern_info() and for unsupported POSIX options.
Arguments:
options an options word
@ -4084,7 +4083,6 @@ Arguments:
Returns: nothing
*/
#ifdef SUPPORT_PCRE2_8
static void
show_compile_extra_options(uint32_t options, const char *before,
const char *after)
@ -4098,7 +4096,6 @@ else fprintf(outfile, "%s%s%s%s%s%s",
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
after);
}
#endif
@ -4272,7 +4269,7 @@ Returns: PR_OK continue processing next line
static int
show_pattern_info(void)
{
uint32_t compile_options, overall_options;
uint32_t compile_options, overall_options, extra_options;
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{
@ -4412,6 +4409,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */
@ -4439,6 +4437,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
}
}
if (extra_options != 0)
show_compile_extra_options(extra_options, "Extra options:", "\n");
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||

View File

@ -461,7 +461,7 @@
# A special extra option allows excaped surrogate code points in 8-bit mode,
# but subjects containing them must not be UTF-checked.
/\x{d800}/utf,allow_surrogate_escapes
/\x{d800}/I,utf,allow_surrogate_escapes
\x{d800}\=no_utf_check
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

View File

@ -367,7 +367,7 @@
# but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes
/\x{d800}/I,utf,allow_surrogate_escapes
\x{d800}\=no_utf_check
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

6
testdata/testinput2 vendored
View File

@ -5287,7 +5287,7 @@ a)"xI
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
/\N{\c/B,bad_escape_is_literal
/\N{\c/IB,bad_escape_is_literal
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
@ -5330,14 +5330,14 @@ a)"xI
/(*CR)abc/literal
(*CR)abc
/cat|dog/match_word
/cat|dog/I,match_word
the cat sat
\= Expect no match
caterpillar
snowcat
syndicate
/(cat)|dog/match_line,literal
/(cat)|dog/I,match_line,literal
(cat)|dog
\= Expect no match
the cat sat

View File

@ -1578,7 +1578,13 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
# A special extra option allows excaped surrogate code points in 8-bit mode,
# but subjects containing them must not be UTF-checked.
/\x{d800}/utf,allow_surrogate_escapes
/\x{d800}/I,utf,allow_surrogate_escapes
Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \xed
Last code unit = \x80
Subject length lower bound = 1
\x{d800}\=no_utf_check
0: \x{d800}

View File

@ -1425,7 +1425,7 @@ No match
# but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes
/\x{d800}/I,utf,allow_surrogate_escapes
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
\x{d800}\=no_utf_check

View File

@ -1417,7 +1417,12 @@ No match
# but subjects containing them must not be UTF-checked. These patterns give
# errors in 16-bit mode.
/\x{d800}/utf,allow_surrogate_escapes
/\x{d800}/I,utf,allow_surrogate_escapes
Capturing subpattern count = 0
Options: utf
Extra options: allow_surrogate_escapes
First code unit = \x{d800}
Subject length lower bound = 1
\x{d800}\=no_utf_check
0: \x{d800}

22
testdata/testoutput2 vendored
View File

@ -16180,13 +16180,18 @@ Subject length lower bound = 1
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
** Unrecognized modifier '\' in '\bad_escape_is_literal'
/\N{\c/B,bad_escape_is_literal
/\N{\c/IB,bad_escape_is_literal
------------------------------------------------------------------
Bra
N{c
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Extra options: bad_escape_is_literal
First code unit = 'N'
Last code unit = 'c'
Subject length lower bound = 3
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
------------------------------------------------------------------
@ -16269,7 +16274,12 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL
(*CR)abc
0: (*CR)abc
/cat|dog/match_word
/cat|dog/I,match_word
Capturing subpattern count = 0
Max lookbehind = 1
Extra options: match_word
Starting code units: c d
Subject length lower bound = 3
the cat sat
0: cat
\= Expect no match
@ -16280,7 +16290,13 @@ No match
syndicate
No match
/(cat)|dog/match_line,literal
/(cat)|dog/I,match_line,literal
Capturing subpattern count = 0
Compile options: literal
Overall options: anchored literal
Extra options: match_line
First code unit = '('
Subject length lower bound = 9
(cat)|dog
0: (cat)|dog
\= Expect no match