From 814cc96bc58ba5566741abe9300c72c6f9201278 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sat, 16 Dec 2017 17:49:26 +0000 Subject: [PATCH] Save extra compile options with the compiled pattern, and add an info call to retrieve them. --- ChangeLog | 4 ++++ doc/pcre2_pattern_info.3 | 6 ++++-- doc/pcre2api.3 | 11 +++++++---- src/pcre2.h | 1 + src/pcre2.h.in | 1 + src/pcre2_compile.c | 1 + src/pcre2_intmodedep.h | 1 + src/pcre2_pattern_info.c | 5 +++++ src/pcre2test.c | 11 ++++++----- testdata/testinput10 | 2 +- testdata/testinput12 | 2 +- testdata/testinput2 | 6 +++--- testdata/testoutput10 | 8 +++++++- testdata/testoutput12-16 | 2 +- testdata/testoutput12-32 | 7 ++++++- testdata/testoutput2 | 22 +++++++++++++++++++--- 16 files changed, 68 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index b0822af..3c4442d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -85,6 +85,10 @@ didn't). 20. Allocate a single callout block on the stack at the start of pcre2_match() and set its never-changing fields once only. +21. Save the extra compile options (set in the compile context) with the +compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS +to retrieve them, and update pcre2test to show them. + Version 10.30 14-August-2017 ---------------------------- diff --git a/doc/pcre2_pattern_info.3 b/doc/pcre2_pattern_info.3 index 256e386..64bfc45 100644 --- a/doc/pcre2_pattern_info.3 +++ b/doc/pcre2_pattern_info.3 @@ -1,4 +1,4 @@ -.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30" +.TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API) .sp This function returns information about a compiled pattern. Its arguments are: .sp - \fIcode\fP Pointer to a compiled regular expression + \fIcode\fP Pointer to a compiled regular expression pattern \fIwhat\fP What information is required \fIwhere\fP Where to put the information .sp @@ -32,6 +32,8 @@ request are as follows: .\" JOIN PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set, otherwise PCRE2_ERROR_UNSET + PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the + compile context PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information 0 nothing set diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index dfc40b5..6925df1 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "14 November 2017" "PCRE2 10.31" +.TH PCRE2API 3 "16 December 2017" "PCRE2 10.31" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1904,12 +1904,15 @@ are as follows: .sp PCRE2_INFO_ALLOPTIONS PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_EXTRAOPTIONS .sp -Return a copy of the pattern's options. The third argument should point to a +Return copies of the pattern's options. The third argument should point to a \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns the compile options as modified by any top-level (*XXX) option settings such as -(*UTF) at the start of the pattern itself. +(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the +extra options that were set in the compile context by calling the +pcre2_set_compile_extra_options() function. .P For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. @@ -3597,6 +3600,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 22 October 2017 +Last updated: 16 December 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/src/pcre2.h b/src/pcre2.h index 0e93177..bbb1771 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -418,6 +418,7 @@ released, the numbers must not be changed. */ #define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_HEAPLIMIT 25 +#define PCRE2_INFO_EXTRAOPTIONS 26 /* Request types for pcre2_config(). */ diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 7612c55..6718689 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -418,6 +418,7 @@ released, the numbers must not be changed. */ #define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_HEAPLIMIT 25 +#define PCRE2_INFO_EXTRAOPTIONS 26 /* Request types for pcre2_config(). */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 1e06040..87530fb 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -9485,6 +9485,7 @@ re->blocksize = re_blocksize; re->magic_number = MAGIC_NUMBER; re->compile_options = options; re->overall_options = cb.external_options; +re->extra_options = ccontext->extra_options; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->limit_heap = limit_heap; re->limit_match = limit_match; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 4c7fe78..ed97a5b 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -623,6 +623,7 @@ typedef struct pcre2_real_code { uint32_t magic_number; /* Paranoid and endianness check */ uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ + uint32_t extra_options; /* Taken from compile_context */ uint32_t flags; /* Various state flags */ uint32_t limit_heap; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */ diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index 540707b..1a51a92 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -76,6 +76,7 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_BSR: case PCRE2_INFO_CAPTURECOUNT: case PCRE2_INFO_DEPTHLIMIT: + case PCRE2_INFO_EXTRAOPTIONS: case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODEUNIT: case PCRE2_INFO_HASBACKSLASHC: @@ -144,6 +145,10 @@ switch(what) if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET; break; + case PCRE2_INFO_EXTRAOPTIONS: + *((uint32_t *)where) = re->extra_options; + break; + case PCRE2_INFO_FIRSTCODETYPE: *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; diff --git a/src/pcre2test.c b/src/pcre2test.c index 9730bde..e0fead5 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -4073,8 +4073,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s% * Show compile extra options * *************************************************/ -/* Called only for unsupported POSIX options at present, and therefore needed -only when the 8-bit library is being compiled. +/* Called from show_pattern_info() and for unsupported POSIX options. Arguments: options an options word @@ -4084,7 +4083,6 @@ Arguments: Returns: nothing */ -#ifdef SUPPORT_PCRE2_8 static void show_compile_extra_options(uint32_t options, const char *before, const char *after) @@ -4098,7 +4096,6 @@ else fprintf(outfile, "%s%s%s%s%s%s", ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", after); } -#endif @@ -4272,7 +4269,7 @@ Returns: PR_OK continue processing next line static int show_pattern_info(void) { -uint32_t compile_options, overall_options; +uint32_t compile_options, overall_options, extra_options; if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) { @@ -4412,6 +4409,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); + pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE); /* Remove UTF/UCP if they were there only because of forbid_utf. This saves cluttering up the verification output of non-UTF test files. */ @@ -4438,6 +4436,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) show_compile_options(overall_options, "Overall options:", "\n"); } } + + if (extra_options != 0) + show_compile_extra_options(extra_options, "Extra options:", "\n"); if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); diff --git a/testdata/testinput10 b/testdata/testinput10 index 2892b42..93d2560 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -461,7 +461,7 @@ # A special extra option allows excaped surrogate code points in 8-bit mode, # but subjects containing them must not be UTF-checked. -/\x{d800}/utf,allow_surrogate_escapes +/\x{d800}/I,utf,allow_surrogate_escapes \x{d800}\=no_utf_check /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes diff --git a/testdata/testinput12 b/testdata/testinput12 index 09df9fa..b0ab909 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -367,7 +367,7 @@ # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode. -/\x{d800}/utf,allow_surrogate_escapes +/\x{d800}/I,utf,allow_surrogate_escapes \x{d800}\=no_utf_check /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes diff --git a/testdata/testinput2 b/testdata/testinput2 index 695f0a4..d3bdc96 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5287,7 +5287,7 @@ a)"xI /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal -/\N{\c/B,bad_escape_is_literal +/\N{\c/IB,bad_escape_is_literal /[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal @@ -5330,14 +5330,14 @@ a)"xI /(*CR)abc/literal (*CR)abc -/cat|dog/match_word +/cat|dog/I,match_word the cat sat \= Expect no match caterpillar snowcat syndicate -/(cat)|dog/match_line,literal +/(cat)|dog/I,match_line,literal (cat)|dog \= Expect no match the cat sat diff --git a/testdata/testoutput10 b/testdata/testoutput10 index f6aeeb9..9660fc5 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1578,7 +1578,13 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), # A special extra option allows excaped surrogate code points in 8-bit mode, # but subjects containing them must not be UTF-checked. -/\x{d800}/utf,allow_surrogate_escapes +/\x{d800}/I,utf,allow_surrogate_escapes +Capturing subpattern count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \xed +Last code unit = \x80 +Subject length lower bound = 1 \x{d800}\=no_utf_check 0: \x{d800} diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 1078042..52dbe74 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1425,7 +1425,7 @@ No match # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode. -/\x{d800}/utf,allow_surrogate_escapes +/\x{d800}/I,utf,allow_surrogate_escapes Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode \x{d800}\=no_utf_check diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 25848ea..38ff92d 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1417,7 +1417,12 @@ No match # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode. -/\x{d800}/utf,allow_surrogate_escapes +/\x{d800}/I,utf,allow_surrogate_escapes +Capturing subpattern count = 0 +Options: utf +Extra options: allow_surrogate_escapes +First code unit = \x{d800} +Subject length lower bound = 1 \x{d800}\=no_utf_check 0: \x{d800} diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ee9cde9..f3b1854 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16180,13 +16180,18 @@ Subject length lower bound = 1 /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal ** Unrecognized modifier '\' in '\bad_escape_is_literal' -/\N{\c/B,bad_escape_is_literal +/\N{\c/IB,bad_escape_is_literal ------------------------------------------------------------------ Bra N{c Ket End ------------------------------------------------------------------ +Capturing subpattern count = 0 +Extra options: bad_escape_is_literal +First code unit = 'N' +Last code unit = 'c' +Subject length lower bound = 3 /[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal ------------------------------------------------------------------ @@ -16269,7 +16274,12 @@ Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL (*CR)abc 0: (*CR)abc -/cat|dog/match_word +/cat|dog/I,match_word +Capturing subpattern count = 0 +Max lookbehind = 1 +Extra options: match_word +Starting code units: c d +Subject length lower bound = 3 the cat sat 0: cat \= Expect no match @@ -16280,7 +16290,13 @@ No match syndicate No match -/(cat)|dog/match_line,literal +/(cat)|dog/I,match_line,literal +Capturing subpattern count = 0 +Compile options: literal +Overall options: anchored literal +Extra options: match_line +First code unit = '(' +Subject length lower bound = 9 (cat)|dog 0: (cat)|dog \= Expect no match