Fix conditional group backreference bug.
This commit is contained in:
parent
c702fd65c8
commit
d4daaf966d
18
ChangeLog
18
ChangeLog
|
@ -1,7 +1,23 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
Version 10.10 13-January-2015
|
||||
-----------------------------
|
||||
|
||||
1. When a pattern is compiled, it remembers the highest back reference so that
|
||||
when matching, if the ovector is too small, extra memory can be obtained to
|
||||
use instead. A conditional subpattern whose condition is a check on a capture
|
||||
having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
|
||||
another kind of back reference, but it was not setting the highest
|
||||
backreference number. This mattered only if pcre2_match() was called with an
|
||||
ovector that was too small to hold the capture, and there was no other kind of
|
||||
back reference (a situation which is probably quite rare). The effect of the
|
||||
bug was that the condition was always treated as FALSE when the capture could
|
||||
not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
|
||||
has been fixed.
|
||||
|
||||
|
||||
Version 10.00 05-January-2015
|
||||
-----------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
|
|
|
@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [00])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2014-01-05])
|
||||
m4_define(pcre2_minor, [10])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2014-01-13])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "02 January 2015" "PCRE2 10.00"
|
||||
.TH PCRE2API 3 "13 January 2015" "PCRE2 10.10"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1474,8 +1474,12 @@ options returned for PCRE2_INFO_ALLOPTIONS.
|
|||
PCRE2_INFO_BACKREFMAX
|
||||
.sp
|
||||
Return the number of the highest back reference in the pattern. The third
|
||||
argument should point to an \fBuint32_t\fP variable. Zero is returned if there
|
||||
are no back references.
|
||||
argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire
|
||||
numbers as well as names, and these count towards the highest back reference.
|
||||
Back references such as \e4 or \eg{12} match the captured characters of the
|
||||
given group, but in addition, the check that a capturing group is set in a
|
||||
conditional subpattern such as (?(3)a|b) is also a back reference. Zero is
|
||||
returned if there are no back references.
|
||||
.sp
|
||||
PCRE2_INFO_BSR
|
||||
.sp
|
||||
|
@ -2849,6 +2853,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 02 January 2015
|
||||
Last updated: 13 January 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -5336,6 +5336,7 @@ for (;; ptr++)
|
|||
goto FAILED;
|
||||
}
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
if (recno > cb->top_backref) cb->top_backref = recno;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -5355,15 +5356,18 @@ for (;; ptr++)
|
|||
|
||||
if (i < cb->names_found)
|
||||
{
|
||||
int offset = i++;
|
||||
int count = 1;
|
||||
recno = GET2(slot, 0); /* Number from first found */
|
||||
for (; i < cb->names_found; i++)
|
||||
int offset = i; /* Offset of first name found */
|
||||
int count = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
recno = GET2(slot, 0); /* Number for last found */
|
||||
if (recno > cb->top_backref) cb->top_backref = recno;
|
||||
count++;
|
||||
if (++i >= cb->names_found) break;
|
||||
slot += cb->name_entry_size;
|
||||
if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
|
||||
(slot+IMM2_SIZE)[namelen] != 0) break;
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count > 1)
|
||||
|
|
|
@ -4116,4 +4116,22 @@ a random value. /Ix
|
|||
|
||||
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
||||
|
||||
'^(?:(a)|b)(?(1)A|B)'
|
||||
aA123\=ovector=1
|
||||
aA123\=ovector=2
|
||||
|
||||
'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
|
||||
aA123\=ovector=1
|
||||
aA123\=ovector=2
|
||||
|
||||
'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
|
||||
aA123\=ovector=1
|
||||
aA123\=ovector=2
|
||||
aA123\=ovector=3
|
||||
|
||||
'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
|
||||
aa123\=ovector=1
|
||||
aa123\=ovector=2
|
||||
aa123\=ovector=3
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -1535,28 +1535,33 @@ Subject length lower bound = 3
|
|||
|
||||
/a(?(1)b)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/a(?(1)bag|big)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Last code unit = 'g'
|
||||
Subject length lower bound = 5
|
||||
|
||||
/a(?(1)bag|big)*(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/a(?(1)bag|big)+(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Last code unit = 'g'
|
||||
Subject length lower bound = 5
|
||||
|
||||
/a(?(1)b..|b..)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 5
|
||||
|
@ -3345,21 +3350,25 @@ Subject length lower bound = 1
|
|||
|
||||
/(?(1)ab|ac)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/(?(1)abz|acz)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'a'
|
||||
Last code unit = 'z'
|
||||
Subject length lower bound = 4
|
||||
|
||||
/(?(1)abz)(.)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?(1)abz)(1)23/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Last code unit = '3'
|
||||
Subject length lower bound = 3
|
||||
|
||||
|
@ -5923,6 +5932,7 @@ Subject length lower bound = 6
|
|||
|
||||
/^(?P<A>a)?(?(A)a|b)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
Compile options: <none>
|
||||
|
@ -5940,6 +5950,7 @@ No match
|
|||
|
||||
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
Last code unit = 'X'
|
||||
|
@ -5956,6 +5967,7 @@ Failed: error 115 at offset 9: reference to non-existent subpattern
|
|||
|
||||
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
Last code unit = 'X'
|
||||
|
@ -5966,6 +5978,7 @@ Subject length lower bound = 3
|
|||
|
||||
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
Last code unit = 'X'
|
||||
|
@ -9713,6 +9726,7 @@ Subject length lower bound = 9
|
|||
(?(1)|.) # check that there was an empty component
|
||||
/Iix
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Compile options: caseless extended
|
||||
Overall options: anchored caseless extended
|
||||
Last code unit = ':'
|
||||
|
@ -9740,6 +9754,7 @@ Failed: error 165 at offset 15: different names for subpatterns of the same numb
|
|||
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
|
||||
(?('quote')[a-z]+|[0-9]+)/Ix,dupnames
|
||||
Capturing subpattern count = 6
|
||||
Max back reference = 4
|
||||
Named capturing subpatterns:
|
||||
apostrophe 2
|
||||
apostrophe 5
|
||||
|
@ -9802,6 +9817,7 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 4
|
||||
Max back reference = 4
|
||||
Named capturing subpatterns:
|
||||
D 4
|
||||
D 1
|
||||
|
@ -9849,6 +9865,7 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 4
|
||||
Max back reference = 4
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 4
|
||||
|
@ -9964,6 +9981,7 @@ Subject length lower bound = 1
|
|||
|
||||
/()i(?(1)a)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
First code unit = 'i'
|
||||
Subject length lower bound = 1
|
||||
ia
|
||||
|
@ -13540,6 +13558,7 @@ No match
|
|||
|
||||
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
VERSION 1
|
||||
Starting code units: a x
|
||||
|
@ -13832,4 +13851,46 @@ Compile options: <none>
|
|||
Overall options: dotall no_dotstar_anchor
|
||||
Subject length lower bound = 1
|
||||
|
||||
'^(?:(a)|b)(?(1)A|B)'
|
||||
aA123\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: aA
|
||||
aA123\=ovector=2
|
||||
0: aA
|
||||
1: a
|
||||
|
||||
'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
|
||||
aA123\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: aA
|
||||
aA123\=ovector=2
|
||||
0: aA
|
||||
1: a
|
||||
|
||||
'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
|
||||
aA123\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: aA
|
||||
aA123\=ovector=2
|
||||
Matched, but too many substrings
|
||||
0: aA
|
||||
1:
|
||||
aA123\=ovector=3
|
||||
0: aA
|
||||
1:
|
||||
2: a
|
||||
|
||||
'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
|
||||
aa123\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: aa
|
||||
aa123\=ovector=2
|
||||
Matched, but too many substrings
|
||||
0: aa
|
||||
1: <unset>
|
||||
aa123\=ovector=3
|
||||
0: aa
|
||||
1: <unset>
|
||||
2: a
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue