From a3275d1e0fdd9991bde174e6a689328d481c7c0e Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 4 Apr 2017 17:09:33 +0000 Subject: [PATCH] Implement PCRE2_ENDANCHORED. --- ChangeLog | 2 ++ doc/pcre2_compile.3 | 3 ++- doc/pcre2_dfa_match.3 | 3 ++- doc/pcre2_match.3 | 3 ++- doc/pcre2_substitute.3 | 3 ++- doc/pcre2api.3 | 47 ++++++++++++++++++++++++++--------------- doc/pcre2test.1 | 6 ++++-- src/pcre2.h | 1 + src/pcre2.h.in | 1 + src/pcre2_compile.c | 2 +- src/pcre2_dfa_match.c | 23 ++++++++++++++------ src/pcre2_fuzzsupport.c | 10 +++++---- src/pcre2_match.c | 24 ++++++++++++++++----- src/pcre2test.c | 7 ++++-- testdata/testinput2 | 29 +++++++++++++++++++++++++ testdata/testoutput2 | 39 ++++++++++++++++++++++++++++++++++ 16 files changed, 161 insertions(+), 42 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2f4b1a0..335d8ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -111,6 +111,8 @@ only to pcre2_match(): 18. Make pcre2test detect an error return from pcre2_get_error_message(), give a message, and abandon the run (this would have detected #13 above). +19. Implemented PCRE2_ENDANCHORED. + Version 10.23 14-February-2017 ------------------------------ diff --git a/doc/pcre2_compile.3 b/doc/pcre2_compile.3 index 9d9157f..b77387e 100644 --- a/doc/pcre2_compile.3 +++ b/doc/pcre2_compile.3 @@ -1,4 +1,4 @@ -.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.30" +.TH PCRE2_COMPILE 3 "04 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -48,6 +48,7 @@ The option bits are: PCRE2_DOLLAR_ENDONLY $ not to match newline at end PCRE2_DOTALL . matches anything including NL PCRE2_DUPNAMES Allow duplicate names for subpatterns + PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_EXTENDED Ignore white space and # comments PCRE2_FIRSTLINE Force matching to be before newline PCRE2_MATCH_UNSET_BACKREF Match unset back references diff --git a/doc/pcre2_dfa_match.3 b/doc/pcre2_dfa_match.3 index 387e7e5..17ed86e 100644 --- a/doc/pcre2_dfa_match.3 +++ b/doc/pcre2_dfa_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_DFA_MATCH 3 "24 March 2017" "PCRE2 10.30" +.TH PCRE2_DFA_MATCH 3 "04 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -38,6 +38,7 @@ up a callout function or specify the recursion depth limit. The \fIlength\fP and \fIstartoffset\fP values are code units, not characters. The options are: .sp PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_NOTBOL Subject is not the beginning of a line PCRE2_NOTEOL Subject is not the end of a line PCRE2_NOTEMPTY An empty string is not a valid match diff --git a/doc/pcre2_match.3 b/doc/pcre2_match.3 index f045d22..dc962b7 100644 --- a/doc/pcre2_match.3 +++ b/doc/pcre2_match.3 @@ -1,4 +1,4 @@ -.TH PCRE2_MATCH 3 "25 March 2017" "PCRE2 10.30" +.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -41,6 +41,7 @@ units, not characters. The length may be given as PCRE2_ZERO_TERMINATE for a subject that is terminated by a binary zero code unit. The options are: .sp PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_NOTBOL Subject string is not the beginning of a line PCRE2_NOTEOL Subject string is not the end of a line PCRE2_NOTEMPTY An empty string is not a valid match diff --git a/doc/pcre2_substitute.3 b/doc/pcre2_substitute.3 index df506f8..17de3ec 100644 --- a/doc/pcre2_substitute.3 +++ b/doc/pcre2_substitute.3 @@ -1,4 +1,4 @@ -.TH PCRE2_SUBSTITUTE 3 "26 March 2017" "PCRE2 10.30" +.TH PCRE2_SUBSTITUTE 3 "04 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -52,6 +52,7 @@ The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for zero-terminated strings. The options are: .sp PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_NOTBOL Subject is not the beginning of a line PCRE2_NOTEOL Subject is not the end of a line PCRE2_NOTEMPTY An empty string is not a valid match diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 3e3b9c5..cfafbc1 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "01 April 2017" "PCRE2 10.30" +.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1091,8 +1091,8 @@ documentation). .P For those options that can be different in different parts of the pattern, the contents of the \fIoptions\fP argument specifies their settings at the start of -compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at -the time of matching as well as at compile time. +compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK +options can be set at the time of matching as well as at compile time. .P Other, less frequently required compile-time parameters (for example, the newline setting) can be provided in a compile context (as described @@ -1248,6 +1248,13 @@ details of named subpatterns below; see also the \fBpcre2pattern\fP .\" documentation. +.sp + PCRE2_ENDANCHORED +.sp +If this bit is set, the end of any pattern match must be right at the end of +the string being searched (the "subject string"). This effect can also be +achieved by appropriate constructs in the pattern itself, which is the only way +to do it in Perl. .sp PCRE2_EXTENDED .sp @@ -2158,15 +2165,15 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA. .rs .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be -zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, -PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, -PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is -described below. +zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. +Their action is described below. .P -Setting PCRE2_ANCHORED at match time is not supported by the just-in-time (JIT) -compiler. If it is set, JIT matching is disabled and the interpretive code in -\fBpcre2_match()\fP is run. Apart from PCRE2_NO_JIT (obviously), the remaining -options are supported for JIT matching. +Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by +the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the +interpretive code in \fBpcre2_match()\fP is run. Apart from PCRE2_NO_JIT +(obviously), the remaining options are supported for JIT matching. .sp PCRE2_ANCHORED .sp @@ -2175,6 +2182,12 @@ matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out to be anchored by virtue of its contents, it cannot be made unachored at matching time. Note that setting the option at match time disables JIT matching. +.sp + PCRE2_ENDANCHORED +.sp +If the PCRE2_ENDANCHORED option is set, any string that \fBpcre2_match()\fP +matches must be right at the end of the subject string. Note that setting the +option at match time disables JIT matching. .sp PCRE2_NOTBOL .sp @@ -3141,11 +3154,11 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP: .rs .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must -be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL, -PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, -PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and -PCRE2_DFA_RESTART. All but the last four of these are exactly the same as for -\fBpcre2_match()\fP, so their description is not repeated here. +be zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, +and PCRE2_DFA_RESTART. All but the last four of these are exactly the same as +for \fBpcre2_match()\fP, so their description is not repeated here. .sp PCRE2_PARTIAL_HARD PCRE2_PARTIAL_SOFT @@ -3309,6 +3322,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 01 April 2017 +Last updated: 04 April 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index ff6e53e..5af0dc5 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "25 March 2017" "PCRE 10.30" +.TH PCRE2TEST 1 "04 April 2017" "PCRE 10.30" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -531,6 +531,7 @@ for a description of their effects. dollar_endonly set PCRE2_DOLLAR_ENDONLY /s dotall set PCRE2_DOTALL dupnames set PCRE2_DUPNAMES + endanchored set PCRE2_ENDANCHORED /x extended set PCRE2_EXTENDED firstline set PCRE2_FIRSTLINE match_unset_backref set PCRE2_MATCH_UNSET_BACKREF @@ -1010,6 +1011,7 @@ The following modifiers set options for \fBpcre2_match()\fP or for a description of their effects. .sp anchored set PCRE2_ANCHORED + endanchored set PCRE2_ENDANCHORED dfa_restart set PCRE2_DFA_RESTART dfa_shortest set PCRE2_DFA_SHORTEST no_jit set PCRE2_NO_JIT @@ -1775,6 +1777,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 25 March 2017 +Last updated: 04 April 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/src/pcre2.h b/src/pcre2.h index f14fcb2..e699b8a 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -101,6 +101,7 @@ others can be added next to them */ #define PCRE2_ANCHORED 0x80000000u #define PCRE2_NO_UTF_CHECK 0x40000000u +#define PCRE2_ENDANCHORED 0x20000000u /* The following option bits can be passed only to pcre2_compile(). However, they may affect compilation, JIT compilation, and/or interpretive execution. diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 643879f..7a8eb71 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -101,6 +101,7 @@ others can be added next to them */ #define PCRE2_ANCHORED 0x80000000u #define PCRE2_NO_UTF_CHECK 0x40000000u +#define PCRE2_ENDANCHORED 0x20000000u /* The following option bits can be passed only to pcre2_compile(). However, they may affect compilation, JIT compilation, and/or interpretive execution. diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 5762aad..0b6175b 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -695,7 +695,7 @@ static int posix_substitutes[] = { #define PUBLIC_COMPILE_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ - PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 0a952b6..1bafb47 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -83,7 +83,7 @@ in others, so I abandoned this code. */ #include "pcre2_internal.h" #define PUBLIC_DFA_MATCH_OPTIONS \ - (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART) @@ -3070,7 +3070,7 @@ for (;;) ) ) match_count = PCRE2_ERROR_PARTIAL; - break; /* In effect, "return", but see the comment below */ + break; /* Exit from loop along the subject string */ } /* One or more states are active for the next character. */ @@ -3078,11 +3078,13 @@ for (;;) ptr += clen; /* Advance to next subject character */ } /* Loop to move along the subject string */ -/* Control gets here from "break" a few lines above. We do it this way because -if we use "return" above, we have compiler trouble. Some compilers warn if -there's nothing here because they think the function doesn't return a value. On -the other hand, if we put a dummy statement here, some more clever compilers -complain that it can't be reached. Sigh. */ +/* Control gets here from "break" a few lines above. If we have a match and +PCRE2_ENDANCHORED is set, the match fails. */ + +if (match_count >= 0 && + ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 && + ptr < end_subject) + match_count = PCRE2_ERROR_NOMATCH; return match_count; } @@ -3155,6 +3157,13 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; +/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same +time. */ + +if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && + ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) + return PCRE2_ERROR_BADOPTION; + /* Check that the first field in the block is the magic number. If it is not, return with PCRE2_ERROR_BADMAGIC. */ diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index 580475a..a2621cd 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -22,7 +22,7 @@ Written by Philip Hazel, October 2016 #define ALLOWED_COMPILE_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ - PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ PCRE2_NO_AUTO_CAPTURE| \ PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ @@ -30,7 +30,7 @@ Written by Philip Hazel, October 2016 PCRE2_UTF) #define ALLOWED_MATCH_OPTIONS \ - (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) @@ -103,7 +103,7 @@ for (i = 0; i < 2; i++) #ifdef STANDALONE printf("Compile options %.8x never_backslash_c", compile_options); - printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", @@ -114,6 +114,7 @@ for (i = 0; i < 2; i++) ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", + ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", @@ -178,8 +179,9 @@ for (i = 0; i < 2; i++) { #ifdef STANDALONE printf("Match options %.8x", match_options); - printf("%s%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s%s\n", ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 69110dc..c5eede4 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -76,7 +76,7 @@ the heap is obtained for patterns that need more frames. */ /* Masks for identifying the public options that are permitted at match time. */ #define PUBLIC_MATCH_OPTIONS \ - (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) @@ -791,10 +791,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode); continue; } - /* Not a recursion. Fail if either PCRE2_NOTEMPTY is set, or if - PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the - subject. In both cases, backtracking will then try other alternatives, if - any. */ + /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY + is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the + start of the subject. In both cases, backtracking will then try other + alternatives, if any. */ if (Feptr == Fstart_match && ((mb->moptions & PCRE2_NOTEMPTY) != 0 || @@ -802,6 +802,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode); Fstart_match == mb->start_subject + mb->start_offset))) RRETURN(MATCH_NOMATCH); + /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not + the end of the subject. */ + + if (Feptr < mb->end_subject && + ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0) + RRETURN(MATCH_NOMATCH); + /* We have a successful match of the whole pattern. Record the result and then do a direct return from the function. If there is space in the offset vector, set any pairs that follow the highest-numbered captured string but @@ -6086,6 +6093,13 @@ set up later. */ utf = (re->overall_options & PCRE2_UTF) != 0; mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 : ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0; + +/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same +time. */ + +if (mb->partial != 0 && + ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) + return PCRE2_ERROR_BADOPTION; /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, we must also check that a starting offset does not point into the middle of a diff --git a/src/pcre2test.c b/src/pcre2test.c index 2f47daa..2906eac 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -578,6 +578,7 @@ static modstruct modlist[] = { { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, + { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) }, { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) }, { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, @@ -3823,7 +3824,7 @@ static void show_compile_options(uint32_t options, const char *before, const char *after) { if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", @@ -3835,6 +3836,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", ((options & PCRE2_DOTALL) != 0)? " dotall" : "", ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "", + ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", ((options & PCRE2_EXTENDED) != 0)? " extended" : "", ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "", ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "", @@ -3866,10 +3868,11 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" static void show_match_options(uint32_t options) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "", ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "", + ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", ((options & PCRE2_NOTBOL) != 0)? " notbol" : "", ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", diff --git a/testdata/testinput2 b/testdata/testinput2 index 9b7788b..cd1cbfa 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5011,4 +5011,33 @@ a)"xI /(a(*MARK:m)(*ACCEPT)){0}(?1)/mark abc +# Temporary, until JIT is updated. +#subject no_jit + +/abc/endanchored + xyzabc +\= Expect no match + xyzabcdef +\= Expect error + xyzabc\=ph + +/abc/ + xyzabc\=endanchored +\= Expect no match + xyzabcdef\=endanchored +\= Expect error + xyzabc\=ps,endanchored + +/abc(*ACCEPT)d/endanchored + xyzabc +\= Expect no match + xyzabcdef + +/abc|bcd/endanchored + xyzabcd +\= Expect no match + xyzabcdef + +#subject -no_jit + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 5a6c94f..5251a0e 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15537,6 +15537,45 @@ No match 0: a MK: m +# Temporary, until JIT is updated. +#subject no_jit + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc(*ACCEPT)d/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +#subject -no_jit + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data