Fix conditional group backreference bug.
This commit is contained in:
parent
c702fd65c8
commit
d4daaf966d
16
ChangeLog
16
ChangeLog
|
@ -1,6 +1,22 @@
|
||||||
Change Log for PCRE2
|
Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
Version 10.10 13-January-2015
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
1. When a pattern is compiled, it remembers the highest back reference so that
|
||||||
|
when matching, if the ovector is too small, extra memory can be obtained to
|
||||||
|
use instead. A conditional subpattern whose condition is a check on a capture
|
||||||
|
having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
|
||||||
|
another kind of back reference, but it was not setting the highest
|
||||||
|
backreference number. This mattered only if pcre2_match() was called with an
|
||||||
|
ovector that was too small to hold the capture, and there was no other kind of
|
||||||
|
back reference (a situation which is probably quite rare). The effect of the
|
||||||
|
bug was that the condition was always treated as FALSE when the capture could
|
||||||
|
not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
|
||||||
|
has been fixed.
|
||||||
|
|
||||||
|
|
||||||
Version 10.00 05-January-2015
|
Version 10.00 05-January-2015
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
|
|
|
@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
||||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
|
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [00])
|
m4_define(pcre2_minor, [10])
|
||||||
m4_define(pcre2_prerelease, [])
|
m4_define(pcre2_prerelease, [-RC1])
|
||||||
m4_define(pcre2_date, [2014-01-05])
|
m4_define(pcre2_date, [2014-01-13])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "02 January 2015" "PCRE2 10.00"
|
.TH PCRE2API 3 "13 January 2015" "PCRE2 10.10"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1474,8 +1474,12 @@ options returned for PCRE2_INFO_ALLOPTIONS.
|
||||||
PCRE2_INFO_BACKREFMAX
|
PCRE2_INFO_BACKREFMAX
|
||||||
.sp
|
.sp
|
||||||
Return the number of the highest back reference in the pattern. The third
|
Return the number of the highest back reference in the pattern. The third
|
||||||
argument should point to an \fBuint32_t\fP variable. Zero is returned if there
|
argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire
|
||||||
are no back references.
|
numbers as well as names, and these count towards the highest back reference.
|
||||||
|
Back references such as \e4 or \eg{12} match the captured characters of the
|
||||||
|
given group, but in addition, the check that a capturing group is set in a
|
||||||
|
conditional subpattern such as (?(3)a|b) is also a back reference. Zero is
|
||||||
|
returned if there are no back references.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_BSR
|
PCRE2_INFO_BSR
|
||||||
.sp
|
.sp
|
||||||
|
@ -2849,6 +2853,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 02 January 2015
|
Last updated: 13 January 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -5336,6 +5336,7 @@ for (;; ptr++)
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
PUT2(code, 2+LINK_SIZE, recno);
|
PUT2(code, 2+LINK_SIZE, recno);
|
||||||
|
if (recno > cb->top_backref) cb->top_backref = recno;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5355,15 +5356,18 @@ for (;; ptr++)
|
||||||
|
|
||||||
if (i < cb->names_found)
|
if (i < cb->names_found)
|
||||||
{
|
{
|
||||||
int offset = i++;
|
int offset = i; /* Offset of first name found */
|
||||||
int count = 1;
|
int count = 0;
|
||||||
recno = GET2(slot, 0); /* Number from first found */
|
|
||||||
for (; i < cb->names_found; i++)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
recno = GET2(slot, 0); /* Number for last found */
|
||||||
|
if (recno > cb->top_backref) cb->top_backref = recno;
|
||||||
|
count++;
|
||||||
|
if (++i >= cb->names_found) break;
|
||||||
slot += cb->name_entry_size;
|
slot += cb->name_entry_size;
|
||||||
if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
|
if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
|
||||||
(slot+IMM2_SIZE)[namelen] != 0) break;
|
(slot+IMM2_SIZE)[namelen] != 0) break;
|
||||||
count++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count > 1)
|
if (count > 1)
|
||||||
|
|
|
@ -4116,4 +4116,22 @@ a random value. /Ix
|
||||||
|
|
||||||
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
||||||
|
|
||||||
|
'^(?:(a)|b)(?(1)A|B)'
|
||||||
|
aA123\=ovector=1
|
||||||
|
aA123\=ovector=2
|
||||||
|
|
||||||
|
'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
|
||||||
|
aA123\=ovector=1
|
||||||
|
aA123\=ovector=2
|
||||||
|
|
||||||
|
'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
|
||||||
|
aA123\=ovector=1
|
||||||
|
aA123\=ovector=2
|
||||||
|
aA123\=ovector=3
|
||||||
|
|
||||||
|
'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
|
||||||
|
aa123\=ovector=1
|
||||||
|
aa123\=ovector=2
|
||||||
|
aa123\=ovector=3
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1535,28 +1535,33 @@ Subject length lower bound = 3
|
||||||
|
|
||||||
/a(?(1)b)(.)/I
|
/a(?(1)b)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
|
|
||||||
/a(?(1)bag|big)(.)/I
|
/a(?(1)bag|big)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Last code unit = 'g'
|
Last code unit = 'g'
|
||||||
Subject length lower bound = 5
|
Subject length lower bound = 5
|
||||||
|
|
||||||
/a(?(1)bag|big)*(.)/I
|
/a(?(1)bag|big)*(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
|
|
||||||
/a(?(1)bag|big)+(.)/I
|
/a(?(1)bag|big)+(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Last code unit = 'g'
|
Last code unit = 'g'
|
||||||
Subject length lower bound = 5
|
Subject length lower bound = 5
|
||||||
|
|
||||||
/a(?(1)b..|b..)(.)/I
|
/a(?(1)b..|b..)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Last code unit = 'b'
|
Last code unit = 'b'
|
||||||
Subject length lower bound = 5
|
Subject length lower bound = 5
|
||||||
|
@ -3345,21 +3350,25 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/(?(1)ab|ac)(.)/I
|
/(?(1)ab|ac)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
|
||||||
/(?(1)abz|acz)(.)/I
|
/(?(1)abz|acz)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Last code unit = 'z'
|
Last code unit = 'z'
|
||||||
Subject length lower bound = 4
|
Subject length lower bound = 4
|
||||||
|
|
||||||
/(?(1)abz)(.)/I
|
/(?(1)abz)(.)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
/(?(1)abz)(1)23/I
|
/(?(1)abz)(1)23/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Last code unit = '3'
|
Last code unit = '3'
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
@ -5923,6 +5932,7 @@ Subject length lower bound = 6
|
||||||
|
|
||||||
/^(?P<A>a)?(?(A)a|b)/I
|
/^(?P<A>a)?(?(A)a|b)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
A 1
|
A 1
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
|
@ -5940,6 +5950,7 @@ No match
|
||||||
|
|
||||||
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
|
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
ZZ 1
|
ZZ 1
|
||||||
Last code unit = 'X'
|
Last code unit = 'X'
|
||||||
|
@ -5956,6 +5967,7 @@ Failed: error 115 at offset 9: reference to non-existent subpattern
|
||||||
|
|
||||||
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
|
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
ZZ 1
|
ZZ 1
|
||||||
Last code unit = 'X'
|
Last code unit = 'X'
|
||||||
|
@ -5966,6 +5978,7 @@ Subject length lower bound = 3
|
||||||
|
|
||||||
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
|
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
ZZ 1
|
ZZ 1
|
||||||
Last code unit = 'X'
|
Last code unit = 'X'
|
||||||
|
@ -9713,6 +9726,7 @@ Subject length lower bound = 9
|
||||||
(?(1)|.) # check that there was an empty component
|
(?(1)|.) # check that there was an empty component
|
||||||
/Iix
|
/Iix
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Compile options: caseless extended
|
Compile options: caseless extended
|
||||||
Overall options: anchored caseless extended
|
Overall options: anchored caseless extended
|
||||||
Last code unit = ':'
|
Last code unit = ':'
|
||||||
|
@ -9740,6 +9754,7 @@ Failed: error 165 at offset 15: different names for subpatterns of the same numb
|
||||||
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
|
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
|
||||||
(?('quote')[a-z]+|[0-9]+)/Ix,dupnames
|
(?('quote')[a-z]+|[0-9]+)/Ix,dupnames
|
||||||
Capturing subpattern count = 6
|
Capturing subpattern count = 6
|
||||||
|
Max back reference = 4
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
apostrophe 2
|
apostrophe 2
|
||||||
apostrophe 5
|
apostrophe 5
|
||||||
|
@ -9802,6 +9817,7 @@ No match
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 4
|
Capturing subpattern count = 4
|
||||||
|
Max back reference = 4
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
D 4
|
D 4
|
||||||
D 1
|
D 1
|
||||||
|
@ -9849,6 +9865,7 @@ No match
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 4
|
Capturing subpattern count = 4
|
||||||
|
Max back reference = 4
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
A 1
|
A 1
|
||||||
A 4
|
A 4
|
||||||
|
@ -9964,6 +9981,7 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/()i(?(1)a)/I
|
/()i(?(1)a)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
First code unit = 'i'
|
First code unit = 'i'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
ia
|
ia
|
||||||
|
@ -13540,6 +13558,7 @@ No match
|
||||||
|
|
||||||
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
|
Max back reference = 1
|
||||||
Named capturing subpatterns:
|
Named capturing subpatterns:
|
||||||
VERSION 1
|
VERSION 1
|
||||||
Starting code units: a x
|
Starting code units: a x
|
||||||
|
@ -13832,4 +13851,46 @@ Compile options: <none>
|
||||||
Overall options: dotall no_dotstar_anchor
|
Overall options: dotall no_dotstar_anchor
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
'^(?:(a)|b)(?(1)A|B)'
|
||||||
|
aA123\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aA
|
||||||
|
aA123\=ovector=2
|
||||||
|
0: aA
|
||||||
|
1: a
|
||||||
|
|
||||||
|
'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
|
||||||
|
aA123\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aA
|
||||||
|
aA123\=ovector=2
|
||||||
|
0: aA
|
||||||
|
1: a
|
||||||
|
|
||||||
|
'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
|
||||||
|
aA123\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aA
|
||||||
|
aA123\=ovector=2
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aA
|
||||||
|
1:
|
||||||
|
aA123\=ovector=3
|
||||||
|
0: aA
|
||||||
|
1:
|
||||||
|
2: a
|
||||||
|
|
||||||
|
'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
|
||||||
|
aa123\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aa
|
||||||
|
aa123\=ovector=2
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: aa
|
||||||
|
1: <unset>
|
||||||
|
aa123\=ovector=3
|
||||||
|
0: aa
|
||||||
|
1: <unset>
|
||||||
|
2: a
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue