diff --git a/ChangeLog b/ChangeLog index bf50857..8d00069 100644 --- a/ChangeLog +++ b/ChangeLog @@ -151,6 +151,8 @@ tests to improve coverage. 29. Implemented PCRE2_EXTENDED_MORE and related /xx and (?xx) features. +30. Implement (?n: for PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + Version 10.23 14-February-2017 ------------------------------ diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 6006f04..e339955 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "17 April 2017" "PCRE2 10.30" +.TH PCRE2API 3 "18 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1426,8 +1426,8 @@ PCRE2_NEVER_UTF causes an error. If this option is set, it disables the use of numbered capturing parentheses in the pattern. Any opening parenthesis that is not followed by ? behaves as if it were followed by ?: but named parentheses can still be used for capturing (and -they acquire numbers in the usual way). There is no equivalent of this option -in Perl. Note that, if this option is set, references to capturing groups (back +they acquire numbers in the usual way). This is the same as Perl's /n option. +Note that, when this option is set, references to capturing groups (back references or recursion/subroutine calls) may only refer to named groups, though the reference can be by name or by number. .sp @@ -3402,6 +3402,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 17 April 2017 +Last updated: 18 April 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index 8978530..5bb4707 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -1543,12 +1543,13 @@ alternative in the subpattern. .rs .sp The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, -PCRE2_EXTENDED, and PCRE2_EXTENDED_MORE options (which are Perl-compatible) can -be changed from within the pattern by a sequence of Perl option letters -enclosed between "(?" and ")". The option letters are +PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which +are Perl-compatible) can be changed from within the pattern by a sequence of +Perl option letters enclosed between "(?" and ")". The option letters are .sp i for PCRE2_CASELESS m for PCRE2_MULTILINE + n for PCRE2_NO_AUTO_CAPTURE s for PCRE2_DOTALL x for PCRE2_EXTENDED xx for PCRE2_EXTENDED_MORE diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index 7901102..1cd503e 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -407,6 +407,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use (?i) caseless (?J) allow duplicate names (?m) multiline + (?n) no auto capture (?s) single line (dotall) (?U) default ungreedy (lazy) (?x) extended: ignore white space except in classes diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index d177cc4..7dec66b 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "17 April 2017" "PCRE 10.30" +.TH PCRE2TEST 1 "18 April 2017" "PCRE 10.30" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -519,10 +519,11 @@ by a previous \fB#pattern\fP command. .SS "Setting compilation options" .rs .sp -The following modifiers set options for \fBpcre2_compile()\fP. The most common -ones have single-letter abbreviations, with special handling for /x (to make -it like Perl). If a second x is present, PCRE2_EXTENDED is converted into -PCRE2_EXTENDED_MORE. A third appearance adds PCRE2_EXTENDED as well. See +The following modifiers set options for \fBpcre2_compile()\fP. There are some +single-letter abbreviations that are the same as Perl options. There is special +handling for /x: if a second x is present, PCRE2_EXTENDED is converted into +PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, +though this makes no difference to the way \fBpcre2_compile()\fP behaves. See .\" HREF \fBpcre2api\fP .\" @@ -547,7 +548,7 @@ for a description of the effects of these options. never_backslash_c set PCRE2_NEVER_BACKSLASH_C never_ucp set PCRE2_NEVER_UCP never_utf set PCRE2_NEVER_UTF - no_auto_capture set PCRE2_NO_AUTO_CAPTURE + /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE no_auto_possess set PCRE2_NO_AUTO_POSSESS no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR no_start_optimize set PCRE2_NO_START_OPTIMIZE @@ -570,7 +571,8 @@ being passed to library functions. .rs .sp The following modifiers affect the compilation process or request information -about the pattern: +about the pattern. There are single-letter abbreviations for some that are +heavily used in the test files. .sp bsr=[anycrlf|unicode] specify \eR handling /B bincode show binary code without lengths @@ -1786,6 +1788,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 17 April 2017 +Last updated: 18 April 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index aa682d1..2ae5306 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2233,11 +2233,11 @@ typedef struct nest_save { #define NSF_RESET 0x0001u #define NSF_CONDASSERT 0x0002u -/* These options (changeable within the pattern) are tracked during parsing. -The rest are put into META_OPTIONS items and used when compiling. */ +/* Of the options that are changeable within the pattern, these are tracked +during parsing. The rest are used from META_OPTIONS items when compiling. */ #define PARSE_TRACKED_OPTIONS \ - (PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_DUPNAMES) + (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) /* States used for analyzing ranges in character classes. The two OK values must be last. */ @@ -3422,9 +3422,7 @@ while (ptr < ptrend) ptr++; } - /* Scan for options imsxJU. Some of them are tracked during parsing (see - PARSE_TRACKED_OPTIONS) as they are local to groups. Others are not needed - till compile time. */ + /* Scan for options imnsxJU to be set or unset. */ else { @@ -3447,6 +3445,7 @@ while (ptr < ptrend) case CHAR_i: *optset |= PCRE2_CASELESS; break; case CHAR_m: *optset |= PCRE2_MULTILINE; break; + case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break; case CHAR_s: *optset |= PCRE2_DOTALL; break; case CHAR_U: *optset |= PCRE2_UNGREEDY; break; diff --git a/src/pcre2test.c b/src/pcre2test.c index e41bfff..b12dfd1 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -720,13 +720,14 @@ typedef struct c1modstruct { } c1modstruct; static c1modstruct c1modlist[] = { - { "bincode", 'B', -1 }, - { "info", 'I', -1 }, - { "global", 'g', -1 }, - { "caseless", 'i', -1 }, - { "multiline", 'm', -1 }, - { "dotall", 's', -1 }, - { "extended", 'x', -1 } + { "bincode", 'B', -1 }, + { "info", 'I', -1 }, + { "global", 'g', -1 }, + { "caseless", 'i', -1 }, + { "multiline", 'm', -1 }, + { "no_auto_capture", 'n', -1 }, + { "dotall", 's', -1 }, + { "extended", 'x', -1 } }; #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) diff --git a/testdata/testinput2 b/testdata/testinput2 index 8afde1b..32be42e 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5259,4 +5259,6 @@ a)"xI /[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B +/(a)(?-n:(b))(c)/nB + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index dff7657..5042a45 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15945,6 +15945,24 @@ Subject length lower bound = 1 End ------------------------------------------------------------------ +/(a)(?-n:(b))(c)/nB +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + CBra 1 + b + Ket + Ket + Bra + c + Ket + Ket + End +------------------------------------------------------------------ + # End of testinput2 Error -64: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data