Add (?* and (?<* synonyms for non-atomic lookarounds.
This commit is contained in:
parent
d170829b26
commit
ac4ab7186d
|
@ -28,6 +28,10 @@ now correctly backtracked, so this unnecessary restriction has been removed.
|
|||
|
||||
7. Added PCRE2_SUBSTITUTE_MATCHED.
|
||||
|
||||
8. Added (?* and (?<* as synonms for (*napla: and (*naplb: to match another
|
||||
regex engine. The Perl regex folks are aware of this usage and have made a note
|
||||
about it.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
|
|
@ -2624,8 +2624,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
|
|||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
syntax:
|
||||
<pre>
|
||||
(*non_atomic_positive_lookahead: or (*napla:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
(*non_atomic_positive_lookahead: or (*napla: or (?*
|
||||
(*non_atomic_positive_lookbehind: or (*naplb: or (?<*
|
||||
</pre>
|
||||
Consider the problem of finding the right-most word in a string that also
|
||||
appears earlier in the string, that is, it must appear at least twice in total.
|
||||
|
@ -3833,7 +3833,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 18 December 2019
|
||||
Last updated: 28 December 2019
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -553,11 +553,13 @@ Each top-level branch of a lookbehind must be of a fixed length.
|
|||
<P>
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
<pre>
|
||||
(*napla:...)
|
||||
(*non_atomic_positive_lookahead:...)
|
||||
(?*...) )
|
||||
(*napla:...) ) synonyms
|
||||
(*non_atomic_positive_lookahead:...) )
|
||||
|
||||
(*naplb:...)
|
||||
(*non_atomic_positive_lookbehind:...)
|
||||
(?<*...) )
|
||||
(*naplb:...) ) synonyms
|
||||
(*non_atomic_positive_lookbehind:...) )
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
|
||||
|
@ -683,7 +685,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 29 July 2019
|
||||
Last updated: 28 December 2019
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -8354,8 +8354,8 @@ NON-ATOMIC ASSERTIONS
|
|||
some cases where non-atomic positive assertions can be useful. PCRE2
|
||||
provides these using the following syntax:
|
||||
|
||||
(*non_atomic_positive_lookahead: or (*napla:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
(*non_atomic_positive_lookahead: or (*napla: or (?*
|
||||
(*non_atomic_positive_lookbehind: or (*naplb: or (?<*
|
||||
|
||||
Consider the problem of finding the right-most word in a string that
|
||||
also appears earlier in the string, that is, it must appear at least
|
||||
|
@ -9487,7 +9487,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 18 December 2019
|
||||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -10716,11 +10716,13 @@ NON-ATOMIC LOOKAROUND ASSERTIONS
|
|||
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
|
||||
(*napla:...)
|
||||
(*non_atomic_positive_lookahead:...)
|
||||
(?*...) )
|
||||
(*napla:...) ) synonyms
|
||||
(*non_atomic_positive_lookahead:...) )
|
||||
|
||||
(*naplb:...)
|
||||
(*non_atomic_positive_lookbehind:...)
|
||||
(?<*...) )
|
||||
(*naplb:...) ) synonyms
|
||||
(*non_atomic_positive_lookbehind:...) )
|
||||
|
||||
|
||||
SCRIPT RUNS
|
||||
|
@ -10844,7 +10846,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 29 July 2019
|
||||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PATTERN 3 "18 December 2019" "PCRE2 10.35"
|
||||
.TH PCRE2PATTERN 3 "28 December 2019" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||
|
@ -2637,8 +2637,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
|
|||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
syntax:
|
||||
.sp
|
||||
(*non_atomic_positive_lookahead: or (*napla:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
(*non_atomic_positive_lookahead: or (*napla: or (?*
|
||||
(*non_atomic_positive_lookbehind: or (*naplb: or (?<*
|
||||
.sp
|
||||
Consider the problem of finding the right-most word in a string that also
|
||||
appears earlier in the string, that is, it must appear at least twice in total.
|
||||
|
@ -3874,6 +3874,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 18 December 2019
|
||||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2SYNTAX 3 "29 July 2019" "PCRE2 10.34"
|
||||
.TH PCRE2SYNTAX 3 "28 December 2019" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
|
||||
|
@ -531,11 +531,13 @@ Each top-level branch of a lookbehind must be of a fixed length.
|
|||
.sp
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
.sp
|
||||
(*napla:...)
|
||||
(*non_atomic_positive_lookahead:...)
|
||||
(?*...) )
|
||||
(*napla:...) ) synonyms
|
||||
(*non_atomic_positive_lookahead:...) )
|
||||
.sp
|
||||
(*naplb:...)
|
||||
(*non_atomic_positive_lookbehind:...)
|
||||
(?<*...) )
|
||||
(*naplb:...) ) synonyms
|
||||
(*non_atomic_positive_lookbehind:...) )
|
||||
.
|
||||
.
|
||||
.SH "SCRIPT RUNS"
|
||||
|
@ -670,6 +672,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 29 July 2019
|
||||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -3653,7 +3653,7 @@ while (ptr < ptrend)
|
|||
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
|
||||
|
||||
/* If ( is not followed by ? it is either a capture or a special verb or an
|
||||
alpha assertion. */
|
||||
alpha assertion or a positive non-atomic lookahead. */
|
||||
|
||||
if (*ptr != CHAR_QUESTION_MARK)
|
||||
{
|
||||
|
@ -3685,10 +3685,10 @@ while (ptr < ptrend)
|
|||
break;
|
||||
|
||||
/* Handle "alpha assertions" such as (*pla:...). Most of these are
|
||||
synonyms for the historical symbolic assertions, but the script run ones
|
||||
are new. They are distinguished by starting with a lower case letter.
|
||||
Checking both ends of the alphabet makes this work in all character
|
||||
codes. */
|
||||
synonyms for the historical symbolic assertions, but the script run and
|
||||
non-atomic lookaround ones are new. They are distinguished by starting
|
||||
with a lower case letter. Checking both ends of the alphabet makes this
|
||||
work in all character codes. */
|
||||
|
||||
else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
|
||||
{
|
||||
|
@ -3747,9 +3747,7 @@ while (ptr < ptrend)
|
|||
goto POSITIVE_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEAD_NA:
|
||||
*parsed_pattern++ = meta;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
goto POSITIVE_NONATOMIC_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEADNOT:
|
||||
goto NEGATIVE_LOOK_AHEAD;
|
||||
|
@ -4438,6 +4436,12 @@ while (ptr < ptrend)
|
|||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_ASTERISK:
|
||||
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
|
||||
*parsed_pattern++ = META_LOOKAHEAD_NA;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_EXCLAMATION_MARK:
|
||||
NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
|
||||
*parsed_pattern++ = META_LOOKAHEADNOT;
|
||||
|
@ -4447,20 +4451,23 @@ while (ptr < ptrend)
|
|||
|
||||
/* ---- Lookbehind assertions ---- */
|
||||
|
||||
/* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
|
||||
start of the name of a capturing group. */
|
||||
/* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
|
||||
is the start of the name of a capturing group. */
|
||||
|
||||
case CHAR_LESS_THAN_SIGN:
|
||||
if (ptrend - ptr <= 1 ||
|
||||
(ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
|
||||
(ptr[1] != CHAR_EQUALS_SIGN &&
|
||||
ptr[1] != CHAR_EXCLAMATION_MARK &&
|
||||
ptr[1] != CHAR_ASTERISK))
|
||||
{
|
||||
terminator = CHAR_GREATER_THAN_SIGN;
|
||||
goto DEFINE_NAME;
|
||||
}
|
||||
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
|
||||
META_LOOKBEHIND : META_LOOKBEHINDNOT;
|
||||
META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
|
||||
META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;
|
||||
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
*has_lookbehind = TRUE;
|
||||
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
|
||||
PUTOFFSET(offset, parsed_pattern);
|
||||
|
@ -4633,8 +4640,6 @@ while (ptr < ptrend)
|
|||
*parsed_pattern++ = META_KET;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
|
||||
else top_nest--;
|
||||
}
|
||||
|
|
|
@ -5670,6 +5670,9 @@ a)"xI
|
|||
/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/
|
||||
word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
|
||||
|
||||
/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/
|
||||
word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
|
||||
|
||||
/(*plb:(.)..|(.)...)(\1|\2)/
|
||||
abcdb\=offset=4
|
||||
abcda\=offset=4
|
||||
|
@ -5678,6 +5681,10 @@ a)"xI
|
|||
abcdb\=offset=4
|
||||
abcda\=offset=4
|
||||
|
||||
/(?<*(.)..|(.)...)(\1|\2)/
|
||||
abcdb\=offset=4
|
||||
abcda\=offset=4
|
||||
|
||||
/(*non_atomic_positive_lookahead:ab)/B
|
||||
|
||||
/(*non_atomic_positive_lookbehind:ab)/B
|
||||
|
|
|
@ -17088,6 +17088,11 @@ No match
|
|||
0: word1 word3 word1 word2 word3 word2 word2 word1 word3
|
||||
1: word3
|
||||
|
||||
/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/
|
||||
word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
|
||||
0: word1 word3 word1 word2 word3 word2 word2 word1 word3
|
||||
1: word3
|
||||
|
||||
/(*plb:(.)..|(.)...)(\1|\2)/
|
||||
abcdb\=offset=4
|
||||
0: b
|
||||
|
@ -17109,6 +17114,18 @@ No match
|
|||
2: a
|
||||
3: a
|
||||
|
||||
/(?<*(.)..|(.)...)(\1|\2)/
|
||||
abcdb\=offset=4
|
||||
0: b
|
||||
1: b
|
||||
2: <unset>
|
||||
3: b
|
||||
abcda\=offset=4
|
||||
0: a
|
||||
1: <unset>
|
||||
2: a
|
||||
3: a
|
||||
|
||||
/(*non_atomic_positive_lookahead:ab)/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
|
|
Loading…
Reference in New Issue