Save extra compile options with the compiled pattern, and add an info call to
retrieve them.
This commit is contained in:
parent
5cbab74c97
commit
814cc96bc5
|
@ -85,6 +85,10 @@ didn't).
|
|||
20. Allocate a single callout block on the stack at the start of pcre2_match()
|
||||
and set its never-changing fields once only.
|
||||
|
||||
21. Save the extra compile options (set in the compile context) with the
|
||||
compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS
|
||||
to retrieve them, and update pcre2test to show them.
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
This function returns information about a compiled pattern. Its arguments are:
|
||||
.sp
|
||||
\fIcode\fP Pointer to a compiled regular expression
|
||||
\fIcode\fP Pointer to a compiled regular expression pattern
|
||||
\fIwhat\fP What information is required
|
||||
\fIwhere\fP Where to put the information
|
||||
.sp
|
||||
|
@ -32,6 +32,8 @@ request are as follows:
|
|||
.\" JOIN
|
||||
PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set,
|
||||
otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the
|
||||
compile context
|
||||
PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL
|
||||
PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information
|
||||
0 nothing set
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "14 November 2017" "PCRE2 10.31"
|
||||
.TH PCRE2API 3 "16 December 2017" "PCRE2 10.31"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1904,12 +1904,15 @@ are as follows:
|
|||
.sp
|
||||
PCRE2_INFO_ALLOPTIONS
|
||||
PCRE2_INFO_ARGOPTIONS
|
||||
PCRE2_INFO_EXTRAOPTIONS
|
||||
.sp
|
||||
Return a copy of the pattern's options. The third argument should point to a
|
||||
Return copies of the pattern's options. The third argument should point to a
|
||||
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
||||
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
|
||||
the compile options as modified by any top-level (*XXX) option settings such as
|
||||
(*UTF) at the start of the pattern itself.
|
||||
(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the
|
||||
extra options that were set in the compile context by calling the
|
||||
pcre2_set_compile_extra_options() function.
|
||||
.P
|
||||
For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED
|
||||
option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF.
|
||||
|
@ -3597,6 +3600,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 22 October 2017
|
||||
Last updated: 16 December 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -418,6 +418,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
|
|
@ -418,6 +418,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
#define PCRE2_INFO_EXTRAOPTIONS 26
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
|
|
@ -9485,6 +9485,7 @@ re->blocksize = re_blocksize;
|
|||
re->magic_number = MAGIC_NUMBER;
|
||||
re->compile_options = options;
|
||||
re->overall_options = cb.external_options;
|
||||
re->extra_options = ccontext->extra_options;
|
||||
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
|
||||
re->limit_heap = limit_heap;
|
||||
re->limit_match = limit_match;
|
||||
|
|
|
@ -623,6 +623,7 @@ typedef struct pcre2_real_code {
|
|||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t extra_options; /* Taken from compile_context */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
|
|
|
@ -76,6 +76,7 @@ if (where == NULL) /* Requests field length */
|
|||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
|
@ -144,6 +145,10 @@ switch(what)
|
|||
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_EXTRAOPTIONS:
|
||||
*((uint32_t *)where) = re->extra_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
|
|
|
@ -4073,8 +4073,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
|
|||
* Show compile extra options *
|
||||
*************************************************/
|
||||
|
||||
/* Called only for unsupported POSIX options at present, and therefore needed
|
||||
only when the 8-bit library is being compiled.
|
||||
/* Called from show_pattern_info() and for unsupported POSIX options.
|
||||
|
||||
Arguments:
|
||||
options an options word
|
||||
|
@ -4084,7 +4083,6 @@ Arguments:
|
|||
Returns: nothing
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
static void
|
||||
show_compile_extra_options(uint32_t options, const char *before,
|
||||
const char *after)
|
||||
|
@ -4098,7 +4096,6 @@ else fprintf(outfile, "%s%s%s%s%s%s",
|
|||
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
|
||||
after);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -4272,7 +4269,7 @@ Returns: PR_OK continue processing next line
|
|||
static int
|
||||
show_pattern_info(void)
|
||||
{
|
||||
uint32_t compile_options, overall_options;
|
||||
uint32_t compile_options, overall_options, extra_options;
|
||||
|
||||
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
|
||||
{
|
||||
|
@ -4412,6 +4409,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
|
||||
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
|
||||
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
|
||||
pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
|
||||
|
||||
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
|
||||
cluttering up the verification output of non-UTF test files. */
|
||||
|
@ -4439,6 +4437,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
}
|
||||
}
|
||||
|
||||
if (extra_options != 0)
|
||||
show_compile_extra_options(extra_options, "Extra options:", "\n");
|
||||
|
||||
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
||||
|
||||
if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
|
||||
|
|
|
@ -461,7 +461,7 @@
|
|||
# A special extra option allows excaped surrogate code points in 8-bit mode,
|
||||
# but subjects containing them must not be UTF-checked.
|
||||
|
||||
/\x{d800}/utf,allow_surrogate_escapes
|
||||
/\x{d800}/I,utf,allow_surrogate_escapes
|
||||
\x{d800}\=no_utf_check
|
||||
|
||||
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||
|
|
|
@ -367,7 +367,7 @@
|
|||
# but subjects containing them must not be UTF-checked. These patterns give
|
||||
# errors in 16-bit mode.
|
||||
|
||||
/\x{d800}/utf,allow_surrogate_escapes
|
||||
/\x{d800}/I,utf,allow_surrogate_escapes
|
||||
\x{d800}\=no_utf_check
|
||||
|
||||
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||
|
|
|
@ -5287,7 +5287,7 @@ a)"xI
|
|||
|
||||
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
|
||||
|
||||
/\N{\c/B,bad_escape_is_literal
|
||||
/\N{\c/IB,bad_escape_is_literal
|
||||
|
||||
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
|
||||
|
||||
|
@ -5330,14 +5330,14 @@ a)"xI
|
|||
/(*CR)abc/literal
|
||||
(*CR)abc
|
||||
|
||||
/cat|dog/match_word
|
||||
/cat|dog/I,match_word
|
||||
the cat sat
|
||||
\= Expect no match
|
||||
caterpillar
|
||||
snowcat
|
||||
syndicate
|
||||
|
||||
/(cat)|dog/match_line,literal
|
||||
/(cat)|dog/I,match_line,literal
|
||||
(cat)|dog
|
||||
\= Expect no match
|
||||
the cat sat
|
||||
|
|
|
@ -1578,7 +1578,13 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
|
|||
# A special extra option allows excaped surrogate code points in 8-bit mode,
|
||||
# but subjects containing them must not be UTF-checked.
|
||||
|
||||
/\x{d800}/utf,allow_surrogate_escapes
|
||||
/\x{d800}/I,utf,allow_surrogate_escapes
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
Extra options: allow_surrogate_escapes
|
||||
First code unit = \xed
|
||||
Last code unit = \x80
|
||||
Subject length lower bound = 1
|
||||
\x{d800}\=no_utf_check
|
||||
0: \x{d800}
|
||||
|
||||
|
|
|
@ -1425,7 +1425,7 @@ No match
|
|||
# but subjects containing them must not be UTF-checked. These patterns give
|
||||
# errors in 16-bit mode.
|
||||
|
||||
/\x{d800}/utf,allow_surrogate_escapes
|
||||
/\x{d800}/I,utf,allow_surrogate_escapes
|
||||
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
|
||||
\x{d800}\=no_utf_check
|
||||
|
||||
|
|
|
@ -1417,7 +1417,12 @@ No match
|
|||
# but subjects containing them must not be UTF-checked. These patterns give
|
||||
# errors in 16-bit mode.
|
||||
|
||||
/\x{d800}/utf,allow_surrogate_escapes
|
||||
/\x{d800}/I,utf,allow_surrogate_escapes
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
Extra options: allow_surrogate_escapes
|
||||
First code unit = \x{d800}
|
||||
Subject length lower bound = 1
|
||||
\x{d800}\=no_utf_check
|
||||
0: \x{d800}
|
||||
|
||||
|
|
|
@ -16180,13 +16180,18 @@ Subject length lower bound = 1
|
|||
/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
|
||||
** Unrecognized modifier '\' in '\bad_escape_is_literal'
|
||||
|
||||
/\N{\c/B,bad_escape_is_literal
|
||||
/\N{\c/IB,bad_escape_is_literal
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
N{c
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Extra options: bad_escape_is_literal
|
||||
First code unit = 'N'
|
||||
Last code unit = 'c'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
|
||||
------------------------------------------------------------------
|
||||
|
@ -16269,7 +16274,12 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL
|
|||
(*CR)abc
|
||||
0: (*CR)abc
|
||||
|
||||
/cat|dog/match_word
|
||||
/cat|dog/I,match_word
|
||||
Capturing subpattern count = 0
|
||||
Max lookbehind = 1
|
||||
Extra options: match_word
|
||||
Starting code units: c d
|
||||
Subject length lower bound = 3
|
||||
the cat sat
|
||||
0: cat
|
||||
\= Expect no match
|
||||
|
@ -16280,7 +16290,13 @@ No match
|
|||
syndicate
|
||||
No match
|
||||
|
||||
/(cat)|dog/match_line,literal
|
||||
/(cat)|dog/I,match_line,literal
|
||||
Capturing subpattern count = 0
|
||||
Compile options: literal
|
||||
Overall options: anchored literal
|
||||
Extra options: match_line
|
||||
First code unit = '('
|
||||
Subject length lower bound = 9
|
||||
(cat)|dog
|
||||
0: (cat)|dog
|
||||
\= Expect no match
|
||||
|
|
Loading…
Reference in New Issue