Runtime UTF checks now take not of the starting offset.
This commit is contained in:
parent
1370a49dfe
commit
ee41aa906f
|
@ -145,6 +145,10 @@ was fixed.
|
||||||
39. Match limit check added to recursion. This issue was found by Karl Skomski
|
39. Match limit check added to recursion. This issue was found by Karl Skomski
|
||||||
with a custom LLVM fuzzer.
|
with a custom LLVM fuzzer.
|
||||||
|
|
||||||
|
40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look
|
||||||
|
only at the part of the subject that is relevant when the starting offset is
|
||||||
|
non-zero.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
88
RunTest
88
RunTest
|
@ -68,12 +68,13 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
||||||
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||||
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||||
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||||
title14="Test 14: Non-JIT limits and other non-JIT tests"
|
title14="Test 14: DFA specials for UTF and UCP support"
|
||||||
title15="Test 15: JIT-specific features when JIT is not available"
|
title15="Test 15: Non-JIT limits and other non-JIT tests"
|
||||||
title16="Test 16: JIT-specific features when JIT is available"
|
title16="Test 16: JIT-specific features when JIT is not available"
|
||||||
title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
|
title17="Test 17: JIT-specific features when JIT is available"
|
||||||
title18="Test 18: Tests of the POSIX interface with UTF/UCP"
|
title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
|
||||||
title19="Test 19: Serialization tests"
|
title19="Test 19: Tests of the POSIX interface with UTF/UCP"
|
||||||
|
title20="Test 20: Serialization tests"
|
||||||
maxtest=18
|
maxtest=18
|
||||||
|
|
||||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
|
@ -97,6 +98,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
echo $title17
|
echo $title17
|
||||||
echo $title18
|
echo $title18
|
||||||
echo $title19
|
echo $title19
|
||||||
|
echo $title20
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -219,6 +221,7 @@ do16=no
|
||||||
do17=no
|
do17=no
|
||||||
do18=no
|
do18=no
|
||||||
do19=no
|
do19=no
|
||||||
|
do20=no
|
||||||
|
|
||||||
while [ $# -gt 0 ] ; do
|
while [ $# -gt 0 ] ; do
|
||||||
case $1 in
|
case $1 in
|
||||||
|
@ -242,10 +245,11 @@ while [ $# -gt 0 ] ; do
|
||||||
17) do17=yes;;
|
17) do17=yes;;
|
||||||
18) do18=yes;;
|
18) do18=yes;;
|
||||||
19) do19=yes;;
|
19) do19=yes;;
|
||||||
|
20) do20=yes;;
|
||||||
-8) arg8=yes;;
|
-8) arg8=yes;;
|
||||||
-16) arg16=yes;;
|
-16) arg16=yes;;
|
||||||
-32) arg32=yes;;
|
-32) arg32=yes;;
|
||||||
bigstack|-bigstack) bigstack=yes;;
|
bigstack|-bigstack) bigstack=yes;;
|
||||||
nojit|-nojit) nojit=yes;;
|
nojit|-nojit) nojit=yes;;
|
||||||
sim|-sim) shift; sim=$1;;
|
sim|-sim) shift; sim=$1;;
|
||||||
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||||
|
@ -305,10 +309,10 @@ if [ $? -eq 0 ] ; then
|
||||||
else
|
else
|
||||||
test2stack="-S 1024"
|
test2stack="-S 1024"
|
||||||
defaultstack="-S 64"
|
defaultstack="-S 64"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
test2stack=""
|
test2stack=""
|
||||||
defaultstack=""
|
defaultstack=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||||
|
@ -387,7 +391,8 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||||
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
|
$do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \
|
||||||
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
$do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \
|
||||||
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
$do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
|
||||||
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \
|
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
|
||||||
|
$do20 = no \
|
||||||
]; then
|
]; then
|
||||||
do0=yes
|
do0=yes
|
||||||
do1=yes
|
do1=yes
|
||||||
|
@ -409,6 +414,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
|
||||||
do17=yes
|
do17=yes
|
||||||
do18=yes
|
do18=yes
|
||||||
do19=yes
|
do19=yes
|
||||||
|
do20=yes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||||
|
@ -688,71 +694,79 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
checkresult $? 13 ""
|
checkresult $? 13 ""
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Test non-JIT match and recursion limits
|
# Tests for DFA UTF and UCP features. Output is different for the different widths.
|
||||||
|
|
||||||
if [ $do14 = yes ] ; then
|
if [ $do14 = yes ] ; then
|
||||||
echo $title14
|
echo $title14
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput14 testtry
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
|
||||||
checkresult $? 14 ""
|
checkresult $? 14-$bits "$opt"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test non-JIT match and recursion limits
|
||||||
|
|
||||||
|
if [ $do15 = yes ] ; then
|
||||||
|
echo $title15
|
||||||
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
|
||||||
|
checkresult $? 15 ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Test JIT-specific features when JIT is not available
|
# Test JIT-specific features when JIT is not available
|
||||||
|
|
||||||
if [ $do15 = yes ] ; then
|
|
||||||
echo $title15
|
|
||||||
if [ $jit -ne 0 ] ; then
|
|
||||||
echo " Skipped because JIT is available"
|
|
||||||
else
|
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
|
|
||||||
checkresult $? 15 ""
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Test JIT-specific features when JIT is available
|
|
||||||
|
|
||||||
if [ $do16 = yes ] ; then
|
if [ $do16 = yes ] ; then
|
||||||
echo $title16
|
echo $title16
|
||||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
if [ $jit -ne 0 ] ; then
|
||||||
echo " Skipped because JIT is not available or nojit was specified"
|
echo " Skipped because JIT is available"
|
||||||
else
|
else
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
|
||||||
checkresult $? 16 ""
|
checkresult $? 16 ""
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Tests for the POSIX interface without UTF/UCP (8-bit only)
|
# Test JIT-specific features when JIT is available
|
||||||
|
|
||||||
if [ $do17 = yes ] ; then
|
if [ $do17 = yes ] ; then
|
||||||
echo $title17
|
echo $title17
|
||||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||||
echo " Skipped when running 16/32-bit tests"
|
echo " Skipped because JIT is not available or nojit was specified"
|
||||||
else
|
else
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
|
||||||
checkresult $? 17 ""
|
checkresult $? 17 ""
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Tests for the POSIX interface with UTF/UCP (8-bit only)
|
# Tests for the POSIX interface without UTF/UCP (8-bit only)
|
||||||
|
|
||||||
if [ $do18 = yes ] ; then
|
if [ $do18 = yes ] ; then
|
||||||
echo $title18
|
echo $title18
|
||||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||||
echo " Skipped when running 16/32-bit tests"
|
echo " Skipped when running 16/32-bit tests"
|
||||||
elif [ $utf -eq 0 ] ; then
|
|
||||||
echo " Skipped because UTF-$bits support is not available"
|
|
||||||
else
|
else
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
|
||||||
checkresult $? 18 ""
|
checkresult $? 18 ""
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Serialization tests
|
# Tests for the POSIX interface with UTF/UCP (8-bit only)
|
||||||
|
|
||||||
if [ $do19 = yes ] ; then
|
if [ $do19 = yes ] ; then
|
||||||
echo $title19
|
echo $title19
|
||||||
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
|
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||||
checkresult $? 19 ""
|
echo " Skipped when running 16/32-bit tests"
|
||||||
|
elif [ $utf -eq 0 ] ; then
|
||||||
|
echo " Skipped because UTF-$bits support is not available"
|
||||||
|
else
|
||||||
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
|
||||||
|
checkresult $? 19 ""
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Serialization tests
|
||||||
|
|
||||||
|
if [ $do20 = yes ] ; then
|
||||||
|
echo $title20
|
||||||
|
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput20 testtry
|
||||||
|
checkresult $? 20 ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# End of loop for 8/16/32-bit tests
|
# End of loop for 8/16/32-bit tests
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "29 July 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -2022,12 +2022,19 @@ If the pattern is anchored, such a match can occur only if the pattern contains
|
||||||
.sp
|
.sp
|
||||||
When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
|
When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
|
||||||
string is checked by default when \fBpcre2_match()\fP is subsequently called.
|
string is checked by default when \fBpcre2_match()\fP is subsequently called.
|
||||||
The entire string is checked before any other processing takes place, and a
|
If a non-zero starting offset is given, the check is applied only to that part
|
||||||
|
of the subject that could be inspected during matching, and there is a check
|
||||||
|
that the starting offset points to the first code unit of a character or to the
|
||||||
|
end of the subject. If there are no lookbehind assertions in the pattern, the
|
||||||
|
check starts at the starting offset. Otherwise, it starts at the length of the
|
||||||
|
longest lookbehind before the starting offset, or at the start of the subject
|
||||||
|
if there are not that many characters before the starting offset. Note that the
|
||||||
|
sequences \eb and \eB are one-character lookbehinds.
|
||||||
|
.P
|
||||||
|
The check is carried out before any other processing takes place, and a
|
||||||
negative error code is returned if the check fails. There are several UTF error
|
negative error code is returned if the check fails. There are several UTF error
|
||||||
codes for each code unit width, corresponding to different problems with the
|
codes for each code unit width, corresponding to different problems with the
|
||||||
code unit sequence. The value of \fIstartoffset\fP is also checked, to ensure
|
code unit sequence. There are discussions about the validity of
|
||||||
that it points to the start of a character or to the end of the subject. There
|
|
||||||
are discussions about the validity of
|
|
||||||
.\" HTML <a href="pcre2unicode.html#utf8strings">
|
.\" HTML <a href="pcre2unicode.html#utf8strings">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
UTF-8 strings,
|
UTF-8 strings,
|
||||||
|
@ -2939,6 +2946,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 29 July 2015
|
Last updated: 18 August 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2UNICODE 3 "23 November 2014" "PCRE2 10.00"
|
.TH PCRE2UNICODE 3 "18 August 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions (revised API)
|
PCRE - Perl-compatible regular expressions (revised API)
|
||||||
.SH "UNICODE AND UTF SUPPORT"
|
.SH "UNICODE AND UTF SUPPORT"
|
||||||
|
@ -117,11 +117,21 @@ UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||||
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||||
strings to be in host byte order.
|
strings to be in host byte order.
|
||||||
.P
|
.P
|
||||||
The entire string is checked before any other processing takes place. In
|
A UTF string is checked before any other processing takes place. In the case of
|
||||||
addition to checking the format of the string, there is a check to ensure that
|
\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP calls with a non-zero starting
|
||||||
all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
|
offset, the check is applied only to that part of the subject that could be
|
||||||
The so-called "non-character" code points are not excluded because Unicode
|
inspected during matching, and there is a check that the starting offset points
|
||||||
corrigendum #9 makes it clear that they should not be.
|
to the first code unit of a character or to the end of the subject. If there
|
||||||
|
are no lookbehind assertions in the pattern, the check starts at the starting
|
||||||
|
offset. Otherwise, it starts at the length of the longest lookbehind before the
|
||||||
|
starting offset, or at the start of the subject if there are not that many
|
||||||
|
characters before the starting offset. Note that the sequences \eb and \eB are
|
||||||
|
one-character lookbehinds.
|
||||||
|
.P
|
||||||
|
In addition to checking the format of the string, there is a check to ensure
|
||||||
|
that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate
|
||||||
|
area. The so-called "non-character" code points are not excluded because
|
||||||
|
Unicode corrigendum #9 makes it clear that they should not be.
|
||||||
.P
|
.P
|
||||||
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||||
where they are used in pairs to encode code points with values greater than
|
where they are used in pairs to encode code points with values greater than
|
||||||
|
@ -252,6 +262,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 23 November 2014
|
Last updated: 18 August 2015
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -4682,7 +4682,7 @@ for (;; ptr++)
|
||||||
that it's a length rather than a small character. */
|
that it's a length rather than a small character. */
|
||||||
|
|
||||||
#ifdef MAYBE_UTF_MULTI
|
#ifdef MAYBE_UTF_MULTI
|
||||||
if (utf && NOT_FIRSTCHAR(code[-1]))
|
if (utf && NOT_FIRSTCU(code[-1]))
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *lastchar = code - 1;
|
PCRE2_UCHAR *lastchar = code - 1;
|
||||||
BACKCHAR(lastchar);
|
BACKCHAR(lastchar);
|
||||||
|
|
|
@ -2774,7 +2774,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
PCRE2_SPTR p = start_subject + local_offsets[rc];
|
||||||
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
|
||||||
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (charcount > 0)
|
if (charcount > 0)
|
||||||
|
@ -2874,7 +2874,7 @@ for (;;)
|
||||||
PCRE2_SPTR pp = local_ptr;
|
PCRE2_SPTR pp = local_ptr;
|
||||||
charcount = (int)(pp - p);
|
charcount = (int)(pp - p);
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
#endif
|
#endif
|
||||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||||
}
|
}
|
||||||
|
@ -2960,7 +2960,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR p = start_subject + local_offsets[0];
|
PCRE2_SPTR p = start_subject + local_offsets[0];
|
||||||
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
PCRE2_SPTR pp = start_subject + local_offsets[1];
|
||||||
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
|
while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
|
||||||
|
@ -3264,18 +3264,50 @@ switch(re->newline_convention)
|
||||||
|
|
||||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||||
we must also check that a starting offset does not point into the middle of a
|
we must also check that a starting offset does not point into the middle of a
|
||||||
multiunit character. */
|
multiunit character. We check only the portion of the subject that is going to
|
||||||
|
be inspected during matching - from the offset minus the maximum back reference
|
||||||
|
to the given length. This saves time when a small part of a large subject is
|
||||||
|
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||||
|
is a number of characters, not code units. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
{
|
{
|
||||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
|
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||||
if (match_data->rc != 0) return match_data->rc;
|
|
||||||
|
if (start_offset > 0)
|
||||||
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (start_offset > 0 && start_offset < length &&
|
unsigned int i;
|
||||||
NOT_FIRSTCHAR(subject[start_offset]))
|
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||||
return PCRE2_ERROR_BADUTFOFFSET;
|
return PCRE2_ERROR_BADUTFOFFSET;
|
||||||
|
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||||
|
{
|
||||||
|
check_subject--;
|
||||||
|
while (check_subject > subject &&
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
(*check_subject & 0xc0) == 0x80)
|
||||||
|
#else /* 16-bit */
|
||||||
|
(*check_subject & 0xfc00) == 0xdc00)
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
check_subject--;
|
||||||
|
}
|
||||||
|
#else /* In the 32-bit library, one code unit equals one character. */
|
||||||
|
check_subject -= re->max_lookbehind;
|
||||||
|
if (check_subject < subject) check_subject = subject;
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||||
|
offset to be an absolute offset in the whole string. */
|
||||||
|
|
||||||
|
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||||
|
length - (check_subject - subject), &(match_data->startchar));
|
||||||
|
if (match_data->rc != 0)
|
||||||
|
{
|
||||||
|
match_data->startchar += check_subject - subject;
|
||||||
|
return match_data->rc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ just to undefine them all. */
|
||||||
#undef MAX_MARK
|
#undef MAX_MARK
|
||||||
#undef MAX_PATTERN_SIZE
|
#undef MAX_PATTERN_SIZE
|
||||||
#undef MAX_UTF_SINGLE_CU
|
#undef MAX_UTF_SINGLE_CU
|
||||||
#undef NOT_FIRSTCHAR
|
#undef NOT_FIRSTCU
|
||||||
#undef PUT
|
#undef PUT
|
||||||
#undef PUT2
|
#undef PUT2
|
||||||
#undef PUT2INC
|
#undef PUT2INC
|
||||||
|
@ -252,7 +252,7 @@ UTF support is omitted, we don't even define them. */
|
||||||
/* #define MAX_UTF_SINGLE_CU */
|
/* #define MAX_UTF_SINGLE_CU */
|
||||||
/* #define HAS_EXTRALEN(c) */
|
/* #define HAS_EXTRALEN(c) */
|
||||||
/* #define GET_EXTRALEN(c) */
|
/* #define GET_EXTRALEN(c) */
|
||||||
/* #define NOT_FIRSTCHAR(c) */
|
/* #define NOT_FIRSTCU(c) */
|
||||||
#define GETCHAR(c, eptr) c = *eptr;
|
#define GETCHAR(c, eptr) c = *eptr;
|
||||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||||
|
@ -285,10 +285,10 @@ Otherwise it has an undefined behaviour. */
|
||||||
|
|
||||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
|
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
|
||||||
|
|
||||||
/* Returns TRUE, if the given character is not the first character
|
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||||
of a UTF sequence. */
|
sequence. */
|
||||||
|
|
||||||
#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
|
#define NOT_FIRSTCU(c) (((c) & 0xc0) == 0x80)
|
||||||
|
|
||||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||||
we know we are in UTF-8 mode. */
|
we know we are in UTF-8 mode. */
|
||||||
|
@ -371,10 +371,10 @@ Otherwise it has an undefined behaviour. */
|
||||||
|
|
||||||
#define GET_EXTRALEN(c) 1
|
#define GET_EXTRALEN(c) 1
|
||||||
|
|
||||||
/* Returns TRUE, if the given character is not the first character
|
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||||
of a UTF sequence. */
|
sequence. */
|
||||||
|
|
||||||
#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
|
#define NOT_FIRSTCU(c) (((c) & 0xfc00) == 0xdc00)
|
||||||
|
|
||||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||||
advancing the pointer. */
|
advancing the pointer. */
|
||||||
|
@ -469,7 +469,7 @@ into one PCRE2_UCHAR unit. */
|
||||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||||
#define HAS_EXTRALEN(c) (0)
|
#define HAS_EXTRALEN(c) (0)
|
||||||
#define GET_EXTRALEN(c) (0)
|
#define GET_EXTRALEN(c) (0)
|
||||||
#define NOT_FIRSTCHAR(c) (0)
|
#define NOT_FIRSTCU(c) (0)
|
||||||
|
|
||||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||||
we know we are in UTF-32 mode. */
|
we know we are in UTF-32 mode. */
|
||||||
|
|
|
@ -6485,6 +6485,7 @@ mb->match_frames_base = &frame_zero;
|
||||||
subject string. */
|
subject string. */
|
||||||
|
|
||||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||||
|
end_subject = subject + length;
|
||||||
|
|
||||||
/* Plausibility checks */
|
/* Plausibility checks */
|
||||||
|
|
||||||
|
@ -6536,18 +6537,50 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
||||||
|
|
||||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||||
we must also check that a starting offset does not point into the middle of a
|
we must also check that a starting offset does not point into the middle of a
|
||||||
multiunit character. */
|
multiunit character. We check only the portion of the subject that is going to
|
||||||
|
be inspected during matching - from the offset minus the maximum back reference
|
||||||
|
to the given length. This saves time when a small part of a large subject is
|
||||||
|
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||||
|
is a number of characters, not code units. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
{
|
{
|
||||||
match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
|
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||||
if (match_data->rc != 0) return match_data->rc;
|
|
||||||
|
if (start_offset > 0)
|
||||||
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (start_offset > 0 && start_offset < length &&
|
unsigned int i;
|
||||||
NOT_FIRSTCHAR(subject[start_offset]))
|
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||||
return PCRE2_ERROR_BADUTFOFFSET;
|
return PCRE2_ERROR_BADUTFOFFSET;
|
||||||
|
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||||
|
{
|
||||||
|
check_subject--;
|
||||||
|
while (check_subject > subject &&
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
(*check_subject & 0xc0) == 0x80)
|
||||||
|
#else /* 16-bit */
|
||||||
|
(*check_subject & 0xfc00) == 0xdc00)
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
check_subject--;
|
||||||
|
}
|
||||||
|
#else /* In the 32-bit library, one code unit equals one character. */
|
||||||
|
check_subject -= re->max_lookbehind;
|
||||||
|
if (check_subject < subject) check_subject = subject;
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||||
|
offset to be an absolute offset in the whole string. */
|
||||||
|
|
||||||
|
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||||
|
length - (check_subject - subject), &(match_data->startchar));
|
||||||
|
if (match_data->rc != 0)
|
||||||
|
{
|
||||||
|
match_data->startchar += check_subject - subject;
|
||||||
|
return match_data->rc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
@ -6594,7 +6627,7 @@ else
|
||||||
|
|
||||||
mb->start_subject = subject;
|
mb->start_subject = subject;
|
||||||
mb->start_offset = start_offset;
|
mb->start_offset = start_offset;
|
||||||
mb->end_subject = end_subject = mb->start_subject + length;
|
mb->end_subject = end_subject;
|
||||||
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
|
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
|
||||||
|
|
||||||
mb->moptions = options; /* Match options */
|
mb->moptions = options; /* Match options */
|
||||||
|
|
|
@ -132,7 +132,36 @@
|
||||||
\xf9\x87\x80\x80\x80\=no_utf_check
|
\xf9\x87\x80\x80\x80\=no_utf_check
|
||||||
\xfc\x84\x80\x80\x80\x80\=no_utf_check
|
\xfc\x84\x80\x80\x80\x80\=no_utf_check
|
||||||
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
||||||
|
|
||||||
|
# Similar tests with offsets
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
|
||||||
|
/(?<=x)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
X\xdfabcd\xdf\=offset=3
|
||||||
|
|
||||||
|
/(?<=xx)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
|
||||||
|
/(?<=xxxx)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
X\xdfabcd\=offset=6
|
||||||
|
X\xdfabc\xdf\=offset=6
|
||||||
|
X\xdfabc\xdf\=offset=7
|
||||||
|
|
||||||
/\x{100}/IB,utf
|
/\x{100}/IB,utf
|
||||||
|
|
||||||
/\x{1000}/IB,utf
|
/\x{1000}/IB,utf
|
||||||
|
|
|
@ -158,6 +158,7 @@
|
||||||
|
|
||||||
/X/utf
|
/X/utf
|
||||||
XX\x{d800}
|
XX\x{d800}
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
XX\x{d800}\=no_utf_check
|
XX\x{d800}\=no_utf_check
|
||||||
XX\x{da00}
|
XX\x{da00}
|
||||||
XX\x{da00}\=no_utf_check
|
XX\x{da00}\=no_utf_check
|
||||||
|
@ -169,6 +170,9 @@
|
||||||
XX\x{dfff}\=no_utf_check
|
XX\x{dfff}\=no_utf_check
|
||||||
XX\x{110000}
|
XX\x{110000}
|
||||||
XX\x{d800}\x{1234}
|
XX\x{d800}\x{1234}
|
||||||
|
|
||||||
|
/(?<=.)X/utf
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
|
||||||
/(*UTF16)\x{11234}/
|
/(*UTF16)\x{11234}/
|
||||||
abcd\x{11234}pqr
|
abcd\x{11234}pqr
|
||||||
|
|
|
@ -1,155 +1,37 @@
|
||||||
# These are:
|
# These test special (mostly error) UTF features of DFA matching. They are a
|
||||||
#
|
# selection of the more comprehensive tests that are run for non-DFA matching.
|
||||||
# (1) Tests of the match-limiting features. The results are different for
|
# The output is different for the different widths.
|
||||||
# interpretive or JIT matching, so this test should not be run with JIT. The
|
|
||||||
# same tests are run using JIT in test 16.
|
|
||||||
|
|
||||||
# (2) Other tests that must not be run with JIT.
|
#subject dfa
|
||||||
|
|
||||||
/(a+)*zz/I
|
/X/utf
|
||||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
XX\x{d800}
|
||||||
aaaaaaaaaaaaaz\=find_limits
|
XX\x{d800}\=offset=3
|
||||||
|
XX\x{d800}\=no_utf_check
|
||||||
|
XX\x{da00}
|
||||||
|
XX\x{da00}\=no_utf_check
|
||||||
|
XX\x{dc00}
|
||||||
|
XX\x{dc00}\=no_utf_check
|
||||||
|
XX\x{de00}
|
||||||
|
XX\x{de00}\=no_utf_check
|
||||||
|
XX\x{dfff}
|
||||||
|
XX\x{dfff}\=no_utf_check
|
||||||
|
XX\x{110000}
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdf
|
||||||
|
XX\xef
|
||||||
|
XXX\xef\x80
|
||||||
|
X\xf7
|
||||||
|
XX\xf7\x80
|
||||||
|
XXX\xf7\x80\x80
|
||||||
|
|
||||||
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
/shortutf/utf
|
||||||
/* this is a C style comment */\=find_limits
|
XX\xdf\=ph
|
||||||
|
XX\xef\=ph
|
||||||
/^(?>a)++/
|
XX\xef\x80\=ph
|
||||||
aa\=find_limits
|
\xf7\=ph
|
||||||
aaaaaaaaa\=find_limits
|
\xf7\x80\=ph
|
||||||
|
|
||||||
/(a)(?1)++/
|
|
||||||
aa\=find_limits
|
|
||||||
aaaaaaaaa\=find_limits
|
|
||||||
|
|
||||||
/a(?:.)*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
|
|
||||||
/a(?:.(*THEN))*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
|
|
||||||
/a(?:.(*THEN:ABC))*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
|
|
||||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
|
|
||||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
|
|
||||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=12bc)abc/
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=4294967290)abc/
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=4294967280)abc/I
|
|
||||||
|
|
||||||
/(a+)*zz/
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
|
||||||
|
|
||||||
/(a+)*zz/
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=60000
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=1000
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=1000)(a+)*zz/I
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
|
||||||
|
|
||||||
# These three have infinitely nested recursions.
|
|
||||||
|
|
||||||
/((?2))((?1))/
|
|
||||||
abc
|
|
||||||
|
|
||||||
/((?(R2)a+|(?1)b))/
|
|
||||||
aaaabcde
|
|
||||||
|
|
||||||
/(?(R)a*(?1)|((?R))b)/
|
|
||||||
aaaabcde
|
|
||||||
|
|
||||||
# The allusedtext modifier does not work with JIT, which does not maintain
|
|
||||||
# the leftchar/rightchar data.
|
|
||||||
|
|
||||||
/abc(?=xyz)/allusedtext
|
|
||||||
abcxyzpqr
|
|
||||||
abcxyzpqr\=aftertext
|
|
||||||
|
|
||||||
/(?<=pqr)abc(?=xyz)/allusedtext
|
|
||||||
xyzpqrabcxyzpqr
|
|
||||||
xyzpqrabcxyzpqr\=aftertext
|
|
||||||
|
|
||||||
/a\b/
|
|
||||||
a.\=allusedtext
|
|
||||||
a\=allusedtext
|
|
||||||
|
|
||||||
/abc\Kxyz/
|
|
||||||
abcxyz\=allusedtext
|
|
||||||
|
|
||||||
/abc(?=xyz(*ACCEPT))/
|
|
||||||
abcxyz\=allusedtext
|
|
||||||
|
|
||||||
/abc(?=abcde)(?=ab)/allusedtext
|
|
||||||
abcabcdefg
|
|
||||||
|
|
||||||
# These tests provoke recursion loops, which give a different error message
|
|
||||||
# when JIT is used.
|
|
||||||
|
|
||||||
/(?R)/I
|
|
||||||
abcd
|
|
||||||
|
|
||||||
/(a|(?R))/I
|
|
||||||
abcd
|
|
||||||
defg
|
|
||||||
|
|
||||||
/(ab|(bc|(de|(?R))))/I
|
|
||||||
abcd
|
|
||||||
fghi
|
|
||||||
|
|
||||||
/(ab|(bc|(de|(?1))))/I
|
|
||||||
abcd
|
|
||||||
fghi
|
|
||||||
|
|
||||||
/x(ab|(bc|(de|(?1)x)x)x)/I
|
|
||||||
xab123
|
|
||||||
xfghi
|
|
||||||
|
|
||||||
/(?!\w)(?R)/
|
|
||||||
abcd
|
|
||||||
=abc
|
|
||||||
|
|
||||||
/(?=\w)(?R)/
|
|
||||||
=abc
|
|
||||||
abcd
|
|
||||||
|
|
||||||
/(?<!\w)(?R)/
|
|
||||||
abcd
|
|
||||||
|
|
||||||
/(?<=\w)(?R)/
|
|
||||||
abcd
|
|
||||||
|
|
||||||
/(a+|(?R)b)/
|
|
||||||
aaa
|
|
||||||
bbb
|
|
||||||
|
|
||||||
/[^\xff]((?1))/BI
|
|
||||||
abcd
|
|
||||||
|
|
||||||
# End of testinput14
|
# End of testinput14
|
||||||
|
|
|
@ -1,9 +1,155 @@
|
||||||
# This test is run only when JIT support is not available. It checks that an
|
# These are:
|
||||||
# attempt to use it has the expected behaviour. It also tests things that
|
#
|
||||||
# are different without JIT.
|
# (1) Tests of the match-limiting features. The results are different for
|
||||||
|
# interpretive or JIT matching, so this test should not be run with JIT. The
|
||||||
|
# same tests are run using JIT in test 17.
|
||||||
|
|
||||||
/abc/I,jit,jitverify
|
# (2) Other tests that must not be run with JIT.
|
||||||
|
|
||||||
/a*/I
|
/(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||||
|
aaaaaaaaaaaaaz\=find_limits
|
||||||
|
|
||||||
|
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||||
|
/* this is a C style comment */\=find_limits
|
||||||
|
|
||||||
|
/^(?>a)++/
|
||||||
|
aa\=find_limits
|
||||||
|
aaaaaaaaa\=find_limits
|
||||||
|
|
||||||
|
/(a)(?1)++/
|
||||||
|
aa\=find_limits
|
||||||
|
aaaaaaaaa\=find_limits
|
||||||
|
|
||||||
|
/a(?:.)*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
|
||||||
|
/a(?:.(*THEN))*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
|
||||||
|
/a(?:.(*THEN:ABC))*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
|
||||||
|
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
|
||||||
|
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
|
||||||
|
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=12bc)abc/
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=4294967290)abc/
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=4294967280)abc/I
|
||||||
|
|
||||||
|
/(a+)*zz/
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
|
|
||||||
|
/(a+)*zz/
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=60000
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=1000
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=1000)(a+)*zz/I
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
|
|
||||||
|
# These three have infinitely nested recursions.
|
||||||
|
|
||||||
|
/((?2))((?1))/
|
||||||
|
abc
|
||||||
|
|
||||||
|
/((?(R2)a+|(?1)b))/
|
||||||
|
aaaabcde
|
||||||
|
|
||||||
|
/(?(R)a*(?1)|((?R))b)/
|
||||||
|
aaaabcde
|
||||||
|
|
||||||
|
# The allusedtext modifier does not work with JIT, which does not maintain
|
||||||
|
# the leftchar/rightchar data.
|
||||||
|
|
||||||
|
/abc(?=xyz)/allusedtext
|
||||||
|
abcxyzpqr
|
||||||
|
abcxyzpqr\=aftertext
|
||||||
|
|
||||||
|
/(?<=pqr)abc(?=xyz)/allusedtext
|
||||||
|
xyzpqrabcxyzpqr
|
||||||
|
xyzpqrabcxyzpqr\=aftertext
|
||||||
|
|
||||||
|
/a\b/
|
||||||
|
a.\=allusedtext
|
||||||
|
a\=allusedtext
|
||||||
|
|
||||||
|
/abc\Kxyz/
|
||||||
|
abcxyz\=allusedtext
|
||||||
|
|
||||||
|
/abc(?=xyz(*ACCEPT))/
|
||||||
|
abcxyz\=allusedtext
|
||||||
|
|
||||||
|
/abc(?=abcde)(?=ab)/allusedtext
|
||||||
|
abcabcdefg
|
||||||
|
|
||||||
|
# These tests provoke recursion loops, which give a different error message
|
||||||
|
# when JIT is used.
|
||||||
|
|
||||||
|
/(?R)/I
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/(a|(?R))/I
|
||||||
|
abcd
|
||||||
|
defg
|
||||||
|
|
||||||
|
/(ab|(bc|(de|(?R))))/I
|
||||||
|
abcd
|
||||||
|
fghi
|
||||||
|
|
||||||
|
/(ab|(bc|(de|(?1))))/I
|
||||||
|
abcd
|
||||||
|
fghi
|
||||||
|
|
||||||
|
/x(ab|(bc|(de|(?1)x)x)x)/I
|
||||||
|
xab123
|
||||||
|
xfghi
|
||||||
|
|
||||||
|
/(?!\w)(?R)/
|
||||||
|
abcd
|
||||||
|
=abc
|
||||||
|
|
||||||
|
/(?=\w)(?R)/
|
||||||
|
=abc
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/(?<!\w)(?R)/
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/(?<=\w)(?R)/
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/(a+|(?R)b)/
|
||||||
|
aaa
|
||||||
|
bbb
|
||||||
|
|
||||||
|
/[^\xff]((?1))/BI
|
||||||
|
abcd
|
||||||
|
|
||||||
# End of testinput15
|
# End of testinput15
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,17 +1,95 @@
|
||||||
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
||||||
# interface with UTF/UCP support, which is supported only with the 8-bit
|
# interface, which is supported only with the 8-bit library. This test should
|
||||||
# library. This test should not be run with JIT (which is not available for the
|
# not be run with JIT (which is not available for the POSIX interface).
|
||||||
# POSIX interface).
|
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
#pattern posix
|
#pattern posix
|
||||||
|
|
||||||
/a\x{1234}b/utf
|
# Test invalid options
|
||||||
a\x{1234}b
|
|
||||||
|
|
||||||
/\w/
|
/abc/auto_callout
|
||||||
+++\x{c2}
|
|
||||||
|
|
||||||
/\w/ucp
|
/abc/
|
||||||
+++\x{c2}
|
abc\=find_limits
|
||||||
|
|
||||||
# End of testdata/testinput17
|
/abc/
|
||||||
|
abc\=partial_hard
|
||||||
|
|
||||||
|
# Real tests
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc
|
||||||
|
*** Failers
|
||||||
|
|
||||||
|
/^abc|def/
|
||||||
|
abcdef
|
||||||
|
abcdef\=notbol
|
||||||
|
|
||||||
|
/.*((abc)$|(def))/
|
||||||
|
defabc
|
||||||
|
defabc\=noteol
|
||||||
|
|
||||||
|
/the quick brown fox/
|
||||||
|
the quick brown fox
|
||||||
|
*** Failers
|
||||||
|
The Quick Brown Fox
|
||||||
|
|
||||||
|
/the quick brown fox/i
|
||||||
|
the quick brown fox
|
||||||
|
The Quick Brown Fox
|
||||||
|
|
||||||
|
/abc.def/
|
||||||
|
*** Failers
|
||||||
|
abc\ndef
|
||||||
|
|
||||||
|
/abc$/
|
||||||
|
abc
|
||||||
|
abc\n
|
||||||
|
|
||||||
|
/(abc)\2/
|
||||||
|
|
||||||
|
/(abc\1)/
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a*(b+)(z)(z)/
|
||||||
|
aaaabbbbzzzz
|
||||||
|
aaaabbbbzzzz\=ovector=0
|
||||||
|
aaaabbbbzzzz\=ovector=1
|
||||||
|
aaaabbbbzzzz\=ovector=2
|
||||||
|
|
||||||
|
/ab.cd/
|
||||||
|
ab-cd
|
||||||
|
ab=cd
|
||||||
|
** Failers
|
||||||
|
ab\ncd
|
||||||
|
|
||||||
|
/ab.cd/s
|
||||||
|
ab-cd
|
||||||
|
ab=cd
|
||||||
|
ab\ncd
|
||||||
|
|
||||||
|
/a(b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a(?P<name>b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a?|b?/
|
||||||
|
abc
|
||||||
|
** Failers
|
||||||
|
ddd\=notempty
|
||||||
|
|
||||||
|
/\w+A/
|
||||||
|
CDAAAAB
|
||||||
|
|
||||||
|
/\w+A/ungreedy
|
||||||
|
CDAAAAB
|
||||||
|
|
||||||
|
/\Biss\B/I,aftertext
|
||||||
|
Mississippi
|
||||||
|
|
||||||
|
/abc/\
|
||||||
|
|
||||||
|
"(?(?C)"
|
||||||
|
|
||||||
|
# End of testdata/testinput18
|
||||||
|
|
|
@ -1,62 +1,17 @@
|
||||||
# This set of tests exercises the serialization/deserialization functions in
|
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
||||||
# the library. It does not use UTF or JIT.
|
# interface with UTF/UCP support, which is supported only with the 8-bit
|
||||||
|
# library. This test should not be run with JIT (which is not available for the
|
||||||
#forbid_utf
|
# POSIX interface).
|
||||||
|
|
||||||
# Compile several patterns, push them onto the stack, and then write them
|
|
||||||
# all to a file.
|
|
||||||
|
|
||||||
#pattern push
|
|
||||||
|
|
||||||
/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
|
|
||||||
(?(DEFINE)
|
|
||||||
(?<NAME_PAT>[a-z]+)
|
|
||||||
(?<ADDRESS_PAT>\d+)
|
|
||||||
)/x
|
|
||||||
/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
|
|
||||||
|
|
||||||
#save testsaved1
|
|
||||||
|
|
||||||
# Do it again for some more patterns.
|
|
||||||
|
|
||||||
/(*MARK:A)(*SKIP:B)(C|X)/mark
|
|
||||||
/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
|
|
||||||
|
|
||||||
#save testsaved2
|
|
||||||
#pattern -push
|
|
||||||
|
|
||||||
# Reload the patterns, then pop them one by one and check them.
|
|
||||||
|
|
||||||
#load testsaved1
|
|
||||||
#load testsaved2
|
|
||||||
|
|
||||||
#pop info
|
|
||||||
foofoo
|
|
||||||
barbar
|
|
||||||
|
|
||||||
#pop mark
|
#pattern posix
|
||||||
C
|
|
||||||
D
|
/a\x{1234}b/utf
|
||||||
|
a\x{1234}b
|
||||||
|
|
||||||
|
/\w/
|
||||||
|
+++\x{c2}
|
||||||
|
|
||||||
|
/\w/ucp
|
||||||
|
+++\x{c2}
|
||||||
|
|
||||||
#pop
|
# End of testdata/testinput19
|
||||||
AmanaplanacanalPanama
|
|
||||||
|
|
||||||
#pop info
|
|
||||||
metcalfe 33
|
|
||||||
|
|
||||||
# Check for an error when different tables are used.
|
|
||||||
|
|
||||||
/abc/push,tables=1
|
|
||||||
/xyz/push,tables=2
|
|
||||||
#save testsaved1
|
|
||||||
|
|
||||||
#pop
|
|
||||||
xyz
|
|
||||||
|
|
||||||
#pop
|
|
||||||
abc
|
|
||||||
|
|
||||||
#pop should give an error
|
|
||||||
pqr
|
|
||||||
|
|
||||||
# End of testinput19
|
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
# This set of tests exercises the serialization/deserialization functions in
|
||||||
|
# the library. It does not use UTF or JIT.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
|
||||||
|
# Compile several patterns, push them onto the stack, and then write them
|
||||||
|
# all to a file.
|
||||||
|
|
||||||
|
#pattern push
|
||||||
|
|
||||||
|
/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
|
||||||
|
(?(DEFINE)
|
||||||
|
(?<NAME_PAT>[a-z]+)
|
||||||
|
(?<ADDRESS_PAT>\d+)
|
||||||
|
)/x
|
||||||
|
/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
|
||||||
|
|
||||||
|
#save testsaved1
|
||||||
|
|
||||||
|
# Do it again for some more patterns.
|
||||||
|
|
||||||
|
/(*MARK:A)(*SKIP:B)(C|X)/mark
|
||||||
|
/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
|
||||||
|
|
||||||
|
#save testsaved2
|
||||||
|
#pattern -push
|
||||||
|
|
||||||
|
# Reload the patterns, then pop them one by one and check them.
|
||||||
|
|
||||||
|
#load testsaved1
|
||||||
|
#load testsaved2
|
||||||
|
|
||||||
|
#pop info
|
||||||
|
foofoo
|
||||||
|
barbar
|
||||||
|
|
||||||
|
#pop mark
|
||||||
|
C
|
||||||
|
D
|
||||||
|
|
||||||
|
#pop
|
||||||
|
AmanaplanacanalPanama
|
||||||
|
|
||||||
|
#pop info
|
||||||
|
metcalfe 33
|
||||||
|
|
||||||
|
# Check for an error when different tables are used.
|
||||||
|
|
||||||
|
/abc/push,tables=1
|
||||||
|
/xyz/push,tables=2
|
||||||
|
#save testsaved1
|
||||||
|
|
||||||
|
#pop
|
||||||
|
xyz
|
||||||
|
|
||||||
|
#pop
|
||||||
|
abc
|
||||||
|
|
||||||
|
#pop should give an error
|
||||||
|
pqr
|
||||||
|
|
||||||
|
# End of testinput20
|
|
@ -235,7 +235,55 @@ No match
|
||||||
No match
|
No match
|
||||||
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# Similar tests with offsets
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(?<=x)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
No match
|
||||||
|
X\xdfabcd\xdf\=offset=3
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
|
||||||
|
|
||||||
|
/(?<=xx)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
|
||||||
|
/(?<=xxxx)badutf/utf
|
||||||
|
X\xdfabcd
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=1
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=2
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=3
|
||||||
|
Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
|
||||||
|
X\xdfabcd\=offset=6
|
||||||
|
No match
|
||||||
|
X\xdfabc\xdf\=offset=6
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
|
||||||
|
X\xdfabc\xdf\=offset=7
|
||||||
|
Failed: error -33: bad offset value
|
||||||
|
|
||||||
/\x{100}/IB,utf
|
/\x{100}/IB,utf
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
|
|
|
@ -609,6 +609,8 @@ Failed: error 106 at offset 13: missing terminating ] for character class
|
||||||
/X/utf
|
/X/utf
|
||||||
XX\x{d800}
|
XX\x{d800}
|
||||||
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
No match
|
||||||
XX\x{d800}\=no_utf_check
|
XX\x{d800}\=no_utf_check
|
||||||
0: X
|
0: X
|
||||||
XX\x{da00}
|
XX\x{da00}
|
||||||
|
@ -631,6 +633,10 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
XX\x{d800}\x{1234}
|
XX\x{d800}\x{1234}
|
||||||
Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
||||||
|
|
||||||
|
/(?<=.)X/utf
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||||||
|
|
||||||
/(*UTF16)\x{11234}/
|
/(*UTF16)\x{11234}/
|
||||||
abcd\x{11234}pqr
|
abcd\x{11234}pqr
|
||||||
|
|
|
@ -602,6 +602,8 @@ Failed: error 106 at offset 13: missing terminating ] for character class
|
||||||
/X/utf
|
/X/utf
|
||||||
XX\x{d800}
|
XX\x{d800}
|
||||||
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
No match
|
||||||
XX\x{d800}\=no_utf_check
|
XX\x{d800}\=no_utf_check
|
||||||
0: X
|
0: X
|
||||||
XX\x{da00}
|
XX\x{da00}
|
||||||
|
@ -624,6 +626,10 @@ Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at of
|
||||||
Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
|
Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
|
||||||
XX\x{d800}\x{1234}
|
XX\x{d800}\x{1234}
|
||||||
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
|
||||||
|
/(?<=.)X/utf
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
|
||||||
/(*UTF16)\x{11234}/
|
/(*UTF16)\x{11234}/
|
||||||
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
||||||
|
|
|
@ -1,334 +0,0 @@
|
||||||
# These are:
|
|
||||||
#
|
|
||||||
# (1) Tests of the match-limiting features. The results are different for
|
|
||||||
# interpretive or JIT matching, so this test should not be run with JIT. The
|
|
||||||
# same tests are run using JIT in test 16.
|
|
||||||
|
|
||||||
# (2) Other tests that must not be run with JIT.
|
|
||||||
|
|
||||||
/(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
|
||||||
Minimum match limit = 8
|
|
||||||
Minimum recursion limit = 6
|
|
||||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
|
||||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
|
||||||
aaaaaaaaaaaaaz\=find_limits
|
|
||||||
Minimum match limit = 32768
|
|
||||||
Minimum recursion limit = 29
|
|
||||||
No match
|
|
||||||
|
|
||||||
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
May match empty string
|
|
||||||
Subject length lower bound = 0
|
|
||||||
/* this is a C style comment */\=find_limits
|
|
||||||
Minimum match limit = 120
|
|
||||||
Minimum recursion limit = 6
|
|
||||||
0: /* this is a C style comment */
|
|
||||||
1: /* this is a C style comment */
|
|
||||||
|
|
||||||
/^(?>a)++/
|
|
||||||
aa\=find_limits
|
|
||||||
Minimum match limit = 5
|
|
||||||
Minimum recursion limit = 2
|
|
||||||
0: aa
|
|
||||||
aaaaaaaaa\=find_limits
|
|
||||||
Minimum match limit = 12
|
|
||||||
Minimum recursion limit = 2
|
|
||||||
0: aaaaaaaaa
|
|
||||||
|
|
||||||
/(a)(?1)++/
|
|
||||||
aa\=find_limits
|
|
||||||
Minimum match limit = 7
|
|
||||||
Minimum recursion limit = 4
|
|
||||||
0: aa
|
|
||||||
1: a
|
|
||||||
aaaaaaaaa\=find_limits
|
|
||||||
Minimum match limit = 21
|
|
||||||
Minimum recursion limit = 4
|
|
||||||
0: aaaaaaaaa
|
|
||||||
1: a
|
|
||||||
|
|
||||||
/a(?:.)*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
Minimum match limit = 65
|
|
||||||
Minimum recursion limit = 2
|
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
|
||||||
|
|
||||||
/a(?:.(*THEN))*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
Minimum match limit = 86
|
|
||||||
Minimum recursion limit = 45
|
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
|
||||||
|
|
||||||
/a(?:.(*THEN:ABC))*?a/ims
|
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
|
||||||
Minimum match limit = 86
|
|
||||||
Minimum recursion limit = 45
|
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
|
||||||
|
|
||||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
Minimum match limit = 7
|
|
||||||
Minimum recursion limit = 2
|
|
||||||
0: aabbccddee
|
|
||||||
|
|
||||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
Minimum match limit = 17
|
|
||||||
Minimum recursion limit = 16
|
|
||||||
0: aabbccddee
|
|
||||||
1: aa
|
|
||||||
2: bb
|
|
||||||
3: cc
|
|
||||||
4: dd
|
|
||||||
5: ee
|
|
||||||
|
|
||||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
|
||||||
aabbccddee\=find_limits
|
|
||||||
Minimum match limit = 13
|
|
||||||
Minimum recursion limit = 10
|
|
||||||
0: aabbccddee
|
|
||||||
1: aa
|
|
||||||
2: cc
|
|
||||||
3: ee
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=12bc)abc/
|
|
||||||
Failed: error 160 at offset 17: (*VERB) not recognized or malformed
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=4294967290)abc/
|
|
||||||
Failed: error 160 at offset 24: (*VERB) not recognized or malformed
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=4294967280)abc/I
|
|
||||||
Capturing subpattern count = 0
|
|
||||||
Recursion limit = 4294967280
|
|
||||||
First code unit = 'a'
|
|
||||||
Last code unit = 'c'
|
|
||||||
Subject length lower bound = 3
|
|
||||||
|
|
||||||
/(a+)*zz/
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
No match
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
|
||||||
Failed: error -47: match limit exceeded
|
|
||||||
|
|
||||||
/(a+)*zz/
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
|
||||||
Failed: error -53: recursion limit exceeded
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Match limit = 3000
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
Failed: error -47: match limit exceeded
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=60000
|
|
||||||
Failed: error -47: match limit exceeded
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Match limit = 3000
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
Failed: error -47: match limit exceeded
|
|
||||||
|
|
||||||
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Match limit = 60000
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
No match
|
|
||||||
aaaaaaaaaaaaaz\=match_limit=3000
|
|
||||||
Failed: error -47: match limit exceeded
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Recursion limit = 10
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
Failed: error -53: recursion limit exceeded
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=1000
|
|
||||||
Failed: error -53: recursion limit exceeded
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Recursion limit = 1000
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
No match
|
|
||||||
|
|
||||||
/(*LIMIT_RECURSION=1000)(a+)*zz/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Recursion limit = 1000
|
|
||||||
Starting code units: a z
|
|
||||||
Last code unit = 'z'
|
|
||||||
Subject length lower bound = 2
|
|
||||||
aaaaaaaaaaaaaz
|
|
||||||
No match
|
|
||||||
aaaaaaaaaaaaaz\=recursion_limit=10
|
|
||||||
Failed: error -53: recursion limit exceeded
|
|
||||||
|
|
||||||
# These three have infinitely nested recursions.
|
|
||||||
|
|
||||||
/((?2))((?1))/
|
|
||||||
abc
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/((?(R2)a+|(?1)b))/
|
|
||||||
aaaabcde
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(?(R)a*(?1)|((?R))b)/
|
|
||||||
aaaabcde
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
# The allusedtext modifier does not work with JIT, which does not maintain
|
|
||||||
# the leftchar/rightchar data.
|
|
||||||
|
|
||||||
/abc(?=xyz)/allusedtext
|
|
||||||
abcxyzpqr
|
|
||||||
0: abcxyz
|
|
||||||
>>>
|
|
||||||
abcxyzpqr\=aftertext
|
|
||||||
0: abcxyz
|
|
||||||
>>>
|
|
||||||
0+ xyzpqr
|
|
||||||
|
|
||||||
/(?<=pqr)abc(?=xyz)/allusedtext
|
|
||||||
xyzpqrabcxyzpqr
|
|
||||||
0: pqrabcxyz
|
|
||||||
<<< >>>
|
|
||||||
xyzpqrabcxyzpqr\=aftertext
|
|
||||||
0: pqrabcxyz
|
|
||||||
<<< >>>
|
|
||||||
0+ xyzpqr
|
|
||||||
|
|
||||||
/a\b/
|
|
||||||
a.\=allusedtext
|
|
||||||
0: a.
|
|
||||||
>
|
|
||||||
a\=allusedtext
|
|
||||||
0: a
|
|
||||||
|
|
||||||
/abc\Kxyz/
|
|
||||||
abcxyz\=allusedtext
|
|
||||||
0: abcxyz
|
|
||||||
<<<
|
|
||||||
|
|
||||||
/abc(?=xyz(*ACCEPT))/
|
|
||||||
abcxyz\=allusedtext
|
|
||||||
0: abcxyz
|
|
||||||
>>>
|
|
||||||
|
|
||||||
/abc(?=abcde)(?=ab)/allusedtext
|
|
||||||
abcabcdefg
|
|
||||||
0: abcabcde
|
|
||||||
>>>>>
|
|
||||||
|
|
||||||
# These tests provoke recursion loops, which give a different error message
|
|
||||||
# when JIT is used.
|
|
||||||
|
|
||||||
/(?R)/I
|
|
||||||
Capturing subpattern count = 0
|
|
||||||
May match empty string
|
|
||||||
Subject length lower bound = 0
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(a|(?R))/I
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
May match empty string
|
|
||||||
Subject length lower bound = 1
|
|
||||||
abcd
|
|
||||||
0: a
|
|
||||||
1: a
|
|
||||||
defg
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(ab|(bc|(de|(?R))))/I
|
|
||||||
Capturing subpattern count = 3
|
|
||||||
May match empty string
|
|
||||||
Subject length lower bound = 2
|
|
||||||
abcd
|
|
||||||
0: ab
|
|
||||||
1: ab
|
|
||||||
fghi
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(ab|(bc|(de|(?1))))/I
|
|
||||||
Capturing subpattern count = 3
|
|
||||||
May match empty string
|
|
||||||
Subject length lower bound = 2
|
|
||||||
abcd
|
|
||||||
0: ab
|
|
||||||
1: ab
|
|
||||||
fghi
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/x(ab|(bc|(de|(?1)x)x)x)/I
|
|
||||||
Capturing subpattern count = 3
|
|
||||||
First code unit = 'x'
|
|
||||||
Subject length lower bound = 3
|
|
||||||
xab123
|
|
||||||
0: xab
|
|
||||||
1: ab
|
|
||||||
xfghi
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(?!\w)(?R)/
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
=abc
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(?=\w)(?R)/
|
|
||||||
=abc
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(?<!\w)(?R)/
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(?<=\w)(?R)/
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/(a+|(?R)b)/
|
|
||||||
aaa
|
|
||||||
0: aaa
|
|
||||||
1: aaa
|
|
||||||
bbb
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
/[^\xff]((?1))/BI
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Bra
|
|
||||||
[^\x{ff}]
|
|
||||||
CBra 1
|
|
||||||
Recurse
|
|
||||||
Ket
|
|
||||||
Ket
|
|
||||||
End
|
|
||||||
------------------------------------------------------------------
|
|
||||||
Capturing subpattern count = 1
|
|
||||||
Subject length lower bound = 1
|
|
||||||
abcd
|
|
||||||
Failed: error -52: nested recursion at the same subject position
|
|
||||||
|
|
||||||
# End of testinput14
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
# These test special (mostly error) UTF features of DFA matching. They are a
|
||||||
|
# selection of the more comprehensive tests that are run for non-DFA matching.
|
||||||
|
# The output is different for the different widths.
|
||||||
|
|
||||||
|
#subject dfa
|
||||||
|
|
||||||
|
/X/utf
|
||||||
|
XX\x{d800}
|
||||||
|
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
No match
|
||||||
|
XX\x{d800}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{da00}
|
||||||
|
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||||||
|
XX\x{da00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dc00}
|
||||||
|
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
|
XX\x{dc00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{de00}
|
||||||
|
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
|
XX\x{de00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dfff}
|
||||||
|
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
|
XX\x{dfff}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{110000}
|
||||||
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdf
|
||||||
|
No match
|
||||||
|
XX\xef
|
||||||
|
No match
|
||||||
|
XXX\xef\x80
|
||||||
|
No match
|
||||||
|
X\xf7
|
||||||
|
No match
|
||||||
|
XX\xf7\x80
|
||||||
|
No match
|
||||||
|
XXX\xf7\x80\x80
|
||||||
|
No match
|
||||||
|
|
||||||
|
/shortutf/utf
|
||||||
|
XX\xdf\=ph
|
||||||
|
No match
|
||||||
|
XX\xef\=ph
|
||||||
|
No match
|
||||||
|
XX\xef\x80\=ph
|
||||||
|
No match
|
||||||
|
\xf7\=ph
|
||||||
|
No match
|
||||||
|
\xf7\x80\=ph
|
||||||
|
No match
|
||||||
|
|
||||||
|
# End of testinput14
|
|
@ -0,0 +1,61 @@
|
||||||
|
# These test special (mostly error) UTF features of DFA matching. They are a
|
||||||
|
# selection of the more comprehensive tests that are run for non-DFA matching.
|
||||||
|
# The output is different for the different widths.
|
||||||
|
|
||||||
|
#subject dfa
|
||||||
|
|
||||||
|
/X/utf
|
||||||
|
XX\x{d800}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
No match
|
||||||
|
XX\x{d800}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{da00}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{da00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dc00}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{dc00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{de00}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{de00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dfff}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{dfff}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{110000}
|
||||||
|
Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdf
|
||||||
|
No match
|
||||||
|
XX\xef
|
||||||
|
No match
|
||||||
|
XXX\xef\x80
|
||||||
|
No match
|
||||||
|
X\xf7
|
||||||
|
No match
|
||||||
|
XX\xf7\x80
|
||||||
|
No match
|
||||||
|
XXX\xf7\x80\x80
|
||||||
|
No match
|
||||||
|
|
||||||
|
/shortutf/utf
|
||||||
|
XX\xdf\=ph
|
||||||
|
No match
|
||||||
|
XX\xef\=ph
|
||||||
|
No match
|
||||||
|
XX\xef\x80\=ph
|
||||||
|
No match
|
||||||
|
\xf7\=ph
|
||||||
|
No match
|
||||||
|
\xf7\x80\=ph
|
||||||
|
No match
|
||||||
|
|
||||||
|
# End of testinput14
|
|
@ -0,0 +1,61 @@
|
||||||
|
# These test special (mostly error) UTF features of DFA matching. They are a
|
||||||
|
# selection of the more comprehensive tests that are run for non-DFA matching.
|
||||||
|
# The output is different for the different widths.
|
||||||
|
|
||||||
|
#subject dfa
|
||||||
|
|
||||||
|
/X/utf
|
||||||
|
XX\x{d800}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
Error -36 (bad UTF-8 offset)
|
||||||
|
XX\x{d800}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{da00}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{da00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dc00}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{dc00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{de00}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{de00}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{dfff}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
XX\x{dfff}\=no_utf_check
|
||||||
|
0: X
|
||||||
|
XX\x{110000}
|
||||||
|
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdf
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
|
||||||
|
XX\xef
|
||||||
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
||||||
|
XXX\xef\x80
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
|
||||||
|
X\xf7
|
||||||
|
Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
|
||||||
|
XX\xf7\x80
|
||||||
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
||||||
|
XXX\xf7\x80\x80
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
|
||||||
|
|
||||||
|
/shortutf/utf
|
||||||
|
XX\xdf\=ph
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
|
||||||
|
XX\xef\=ph
|
||||||
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
||||||
|
XX\xef\x80\=ph
|
||||||
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
|
||||||
|
\xf7\=ph
|
||||||
|
Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
|
||||||
|
\xf7\x80\=ph
|
||||||
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
|
||||||
|
|
||||||
|
# End of testinput14
|
|
@ -1,17 +1,334 @@
|
||||||
# This test is run only when JIT support is not available. It checks that an
|
# These are:
|
||||||
# attempt to use it has the expected behaviour. It also tests things that
|
#
|
||||||
# are different without JIT.
|
# (1) Tests of the match-limiting features. The results are different for
|
||||||
|
# interpretive or JIT matching, so this test should not be run with JIT. The
|
||||||
|
# same tests are run using JIT in test 17.
|
||||||
|
|
||||||
/abc/I,jit,jitverify
|
# (2) Other tests that must not be run with JIT.
|
||||||
|
|
||||||
|
/(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||||
|
Minimum match limit = 8
|
||||||
|
Minimum recursion limit = 6
|
||||||
|
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||||
|
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
|
aaaaaaaaaaaaaz\=find_limits
|
||||||
|
Minimum match limit = 32768
|
||||||
|
Minimum recursion limit = 29
|
||||||
|
No match
|
||||||
|
|
||||||
|
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
May match empty string
|
||||||
|
Subject length lower bound = 0
|
||||||
|
/* this is a C style comment */\=find_limits
|
||||||
|
Minimum match limit = 120
|
||||||
|
Minimum recursion limit = 6
|
||||||
|
0: /* this is a C style comment */
|
||||||
|
1: /* this is a C style comment */
|
||||||
|
|
||||||
|
/^(?>a)++/
|
||||||
|
aa\=find_limits
|
||||||
|
Minimum match limit = 5
|
||||||
|
Minimum recursion limit = 2
|
||||||
|
0: aa
|
||||||
|
aaaaaaaaa\=find_limits
|
||||||
|
Minimum match limit = 12
|
||||||
|
Minimum recursion limit = 2
|
||||||
|
0: aaaaaaaaa
|
||||||
|
|
||||||
|
/(a)(?1)++/
|
||||||
|
aa\=find_limits
|
||||||
|
Minimum match limit = 7
|
||||||
|
Minimum recursion limit = 4
|
||||||
|
0: aa
|
||||||
|
1: a
|
||||||
|
aaaaaaaaa\=find_limits
|
||||||
|
Minimum match limit = 21
|
||||||
|
Minimum recursion limit = 4
|
||||||
|
0: aaaaaaaaa
|
||||||
|
1: a
|
||||||
|
|
||||||
|
/a(?:.)*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum match limit = 65
|
||||||
|
Minimum recursion limit = 2
|
||||||
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
|
/a(?:.(*THEN))*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum match limit = 86
|
||||||
|
Minimum recursion limit = 45
|
||||||
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
|
/a(?:.(*THEN:ABC))*?a/ims
|
||||||
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum match limit = 86
|
||||||
|
Minimum recursion limit = 45
|
||||||
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
|
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
Minimum match limit = 7
|
||||||
|
Minimum recursion limit = 2
|
||||||
|
0: aabbccddee
|
||||||
|
|
||||||
|
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
Minimum match limit = 17
|
||||||
|
Minimum recursion limit = 16
|
||||||
|
0: aabbccddee
|
||||||
|
1: aa
|
||||||
|
2: bb
|
||||||
|
3: cc
|
||||||
|
4: dd
|
||||||
|
5: ee
|
||||||
|
|
||||||
|
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||||
|
aabbccddee\=find_limits
|
||||||
|
Minimum match limit = 13
|
||||||
|
Minimum recursion limit = 10
|
||||||
|
0: aabbccddee
|
||||||
|
1: aa
|
||||||
|
2: cc
|
||||||
|
3: ee
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=12bc)abc/
|
||||||
|
Failed: error 160 at offset 17: (*VERB) not recognized or malformed
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=4294967290)abc/
|
||||||
|
Failed: error 160 at offset 24: (*VERB) not recognized or malformed
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=4294967280)abc/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
|
Recursion limit = 4294967280
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Last code unit = 'c'
|
Last code unit = 'c'
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
JIT support is not available in this version of PCRE2
|
|
||||||
|
|
||||||
/a*/I
|
/(a+)*zz/
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
No match
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
|
Failed: error -47: match limit exceeded
|
||||||
|
|
||||||
|
/(a+)*zz/
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
|
Failed: error -53: recursion limit exceeded
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Match limit = 3000
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
Failed: error -47: match limit exceeded
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=60000
|
||||||
|
Failed: error -47: match limit exceeded
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Match limit = 3000
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
Failed: error -47: match limit exceeded
|
||||||
|
|
||||||
|
/(*LIMIT_MATCH=60000)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Match limit = 60000
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
No match
|
||||||
|
aaaaaaaaaaaaaz\=match_limit=3000
|
||||||
|
Failed: error -47: match limit exceeded
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=10)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Recursion limit = 10
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
Failed: error -53: recursion limit exceeded
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=1000
|
||||||
|
Failed: error -53: recursion limit exceeded
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Recursion limit = 1000
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(*LIMIT_RECURSION=1000)(a+)*zz/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Recursion limit = 1000
|
||||||
|
Starting code units: a z
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
aaaaaaaaaaaaaz
|
||||||
|
No match
|
||||||
|
aaaaaaaaaaaaaz\=recursion_limit=10
|
||||||
|
Failed: error -53: recursion limit exceeded
|
||||||
|
|
||||||
|
# These three have infinitely nested recursions.
|
||||||
|
|
||||||
|
/((?2))((?1))/
|
||||||
|
abc
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/((?(R2)a+|(?1)b))/
|
||||||
|
aaaabcde
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(?(R)a*(?1)|((?R))b)/
|
||||||
|
aaaabcde
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
# The allusedtext modifier does not work with JIT, which does not maintain
|
||||||
|
# the leftchar/rightchar data.
|
||||||
|
|
||||||
|
/abc(?=xyz)/allusedtext
|
||||||
|
abcxyzpqr
|
||||||
|
0: abcxyz
|
||||||
|
>>>
|
||||||
|
abcxyzpqr\=aftertext
|
||||||
|
0: abcxyz
|
||||||
|
>>>
|
||||||
|
0+ xyzpqr
|
||||||
|
|
||||||
|
/(?<=pqr)abc(?=xyz)/allusedtext
|
||||||
|
xyzpqrabcxyzpqr
|
||||||
|
0: pqrabcxyz
|
||||||
|
<<< >>>
|
||||||
|
xyzpqrabcxyzpqr\=aftertext
|
||||||
|
0: pqrabcxyz
|
||||||
|
<<< >>>
|
||||||
|
0+ xyzpqr
|
||||||
|
|
||||||
|
/a\b/
|
||||||
|
a.\=allusedtext
|
||||||
|
0: a.
|
||||||
|
>
|
||||||
|
a\=allusedtext
|
||||||
|
0: a
|
||||||
|
|
||||||
|
/abc\Kxyz/
|
||||||
|
abcxyz\=allusedtext
|
||||||
|
0: abcxyz
|
||||||
|
<<<
|
||||||
|
|
||||||
|
/abc(?=xyz(*ACCEPT))/
|
||||||
|
abcxyz\=allusedtext
|
||||||
|
0: abcxyz
|
||||||
|
>>>
|
||||||
|
|
||||||
|
/abc(?=abcde)(?=ab)/allusedtext
|
||||||
|
abcabcdefg
|
||||||
|
0: abcabcde
|
||||||
|
>>>>>
|
||||||
|
|
||||||
|
# These tests provoke recursion loops, which give a different error message
|
||||||
|
# when JIT is used.
|
||||||
|
|
||||||
|
/(?R)/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(a|(?R))/I
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
May match empty string
|
||||||
|
Subject length lower bound = 1
|
||||||
|
abcd
|
||||||
|
0: a
|
||||||
|
1: a
|
||||||
|
defg
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(ab|(bc|(de|(?R))))/I
|
||||||
|
Capturing subpattern count = 3
|
||||||
|
May match empty string
|
||||||
|
Subject length lower bound = 2
|
||||||
|
abcd
|
||||||
|
0: ab
|
||||||
|
1: ab
|
||||||
|
fghi
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(ab|(bc|(de|(?1))))/I
|
||||||
|
Capturing subpattern count = 3
|
||||||
|
May match empty string
|
||||||
|
Subject length lower bound = 2
|
||||||
|
abcd
|
||||||
|
0: ab
|
||||||
|
1: ab
|
||||||
|
fghi
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/x(ab|(bc|(de|(?1)x)x)x)/I
|
||||||
|
Capturing subpattern count = 3
|
||||||
|
First code unit = 'x'
|
||||||
|
Subject length lower bound = 3
|
||||||
|
xab123
|
||||||
|
0: xab
|
||||||
|
1: ab
|
||||||
|
xfghi
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(?!\w)(?R)/
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
=abc
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(?=\w)(?R)/
|
||||||
|
=abc
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(?<!\w)(?R)/
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(?<=\w)(?R)/
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/(a+|(?R)b)/
|
||||||
|
aaa
|
||||||
|
0: aaa
|
||||||
|
1: aaa
|
||||||
|
bbb
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
|
/[^\xff]((?1))/BI
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{ff}]
|
||||||
|
CBra 1
|
||||||
|
Recurse
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 1
|
||||||
|
Subject length lower bound = 1
|
||||||
|
abcd
|
||||||
|
Failed: error -52: nested recursion at the same subject position
|
||||||
|
|
||||||
# End of testinput15
|
# End of testinput15
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,20 +1,148 @@
|
||||||
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
||||||
# interface with UTF/UCP support, which is supported only with the 8-bit
|
# interface, which is supported only with the 8-bit library. This test should
|
||||||
# library. This test should not be run with JIT (which is not available for the
|
# not be run with JIT (which is not available for the POSIX interface).
|
||||||
# POSIX interface).
|
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
#pattern posix
|
#pattern posix
|
||||||
|
|
||||||
/a\x{1234}b/utf
|
# Test invalid options
|
||||||
a\x{1234}b
|
|
||||||
0: a\x{1234}b
|
|
||||||
|
|
||||||
/\w/
|
/abc/auto_callout
|
||||||
+++\x{c2}
|
** Ignored with POSIX interface: auto_callout
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc\=find_limits
|
||||||
|
** Ignored with POSIX interface: find_limits
|
||||||
|
0: abc
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc\=partial_hard
|
||||||
|
** Ignored with POSIX interface: partial_hard
|
||||||
|
0: abc
|
||||||
|
|
||||||
|
# Real tests
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc
|
||||||
|
0: abc
|
||||||
|
*** Failers
|
||||||
No match: POSIX code 17: match failed
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
/\w/ucp
|
/^abc|def/
|
||||||
+++\x{c2}
|
abcdef
|
||||||
0: \xc2
|
0: abc
|
||||||
|
abcdef\=notbol
|
||||||
# End of testdata/testinput17
|
0: def
|
||||||
|
|
||||||
|
/.*((abc)$|(def))/
|
||||||
|
defabc
|
||||||
|
0: defabc
|
||||||
|
1: abc
|
||||||
|
2: abc
|
||||||
|
defabc\=noteol
|
||||||
|
0: def
|
||||||
|
1: def
|
||||||
|
3: def
|
||||||
|
|
||||||
|
/the quick brown fox/
|
||||||
|
the quick brown fox
|
||||||
|
0: the quick brown fox
|
||||||
|
*** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
The Quick Brown Fox
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/the quick brown fox/i
|
||||||
|
the quick brown fox
|
||||||
|
0: the quick brown fox
|
||||||
|
The Quick Brown Fox
|
||||||
|
0: The Quick Brown Fox
|
||||||
|
|
||||||
|
/abc.def/
|
||||||
|
*** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
abc\ndef
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/abc$/
|
||||||
|
abc
|
||||||
|
0: abc
|
||||||
|
abc\n
|
||||||
|
0: abc
|
||||||
|
|
||||||
|
/(abc)\2/
|
||||||
|
Failed: POSIX code 15: bad back reference at offset 6
|
||||||
|
|
||||||
|
/(abc\1)/
|
||||||
|
abc
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/a*(b+)(z)(z)/
|
||||||
|
aaaabbbbzzzz
|
||||||
|
0: aaaabbbbzz
|
||||||
|
1: bbbb
|
||||||
|
2: z
|
||||||
|
3: z
|
||||||
|
aaaabbbbzzzz\=ovector=0
|
||||||
|
Matched without capture
|
||||||
|
aaaabbbbzzzz\=ovector=1
|
||||||
|
0: aaaabbbbzz
|
||||||
|
aaaabbbbzzzz\=ovector=2
|
||||||
|
0: aaaabbbbzz
|
||||||
|
1: bbbb
|
||||||
|
|
||||||
|
/ab.cd/
|
||||||
|
ab-cd
|
||||||
|
0: ab-cd
|
||||||
|
ab=cd
|
||||||
|
0: ab=cd
|
||||||
|
** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
ab\ncd
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/ab.cd/s
|
||||||
|
ab-cd
|
||||||
|
0: ab-cd
|
||||||
|
ab=cd
|
||||||
|
0: ab=cd
|
||||||
|
ab\ncd
|
||||||
|
0: ab\x0acd
|
||||||
|
|
||||||
|
/a(b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
Matched with REG_NOSUB
|
||||||
|
|
||||||
|
/a(?P<name>b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
Matched with REG_NOSUB
|
||||||
|
|
||||||
|
/a?|b?/
|
||||||
|
abc
|
||||||
|
0: a
|
||||||
|
** Failers
|
||||||
|
0:
|
||||||
|
ddd\=notempty
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/\w+A/
|
||||||
|
CDAAAAB
|
||||||
|
0: CDAAAA
|
||||||
|
|
||||||
|
/\w+A/ungreedy
|
||||||
|
CDAAAAB
|
||||||
|
0: CDA
|
||||||
|
|
||||||
|
/\Biss\B/I,aftertext
|
||||||
|
** Ignored with POSIX interface: info
|
||||||
|
Mississippi
|
||||||
|
0: iss
|
||||||
|
0+ issippi
|
||||||
|
|
||||||
|
/abc/\
|
||||||
|
Failed: POSIX code 9: bad escape sequence at offset 4
|
||||||
|
|
||||||
|
"(?(?C)"
|
||||||
|
Failed: POSIX code 3: pattern error at offset 2
|
||||||
|
|
||||||
|
# End of testdata/testinput18
|
||||||
|
|
|
@ -1,100 +1,20 @@
|
||||||
# This set of tests exercises the serialization/deserialization functions in
|
# This set of tests is run only with the 8-bit library. It tests the POSIX
|
||||||
# the library. It does not use UTF or JIT.
|
# interface with UTF/UCP support, which is supported only with the 8-bit
|
||||||
|
# library. This test should not be run with JIT (which is not available for the
|
||||||
#forbid_utf
|
# POSIX interface).
|
||||||
|
|
||||||
# Compile several patterns, push them onto the stack, and then write them
|
|
||||||
# all to a file.
|
|
||||||
|
|
||||||
#pattern push
|
|
||||||
|
|
||||||
/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
|
|
||||||
(?(DEFINE)
|
|
||||||
(?<NAME_PAT>[a-z]+)
|
|
||||||
(?<ADDRESS_PAT>\d+)
|
|
||||||
)/x
|
|
||||||
/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
|
|
||||||
|
|
||||||
#save testsaved1
|
|
||||||
|
|
||||||
# Do it again for some more patterns.
|
|
||||||
|
|
||||||
/(*MARK:A)(*SKIP:B)(C|X)/mark
|
|
||||||
** Ignored when compiled pattern is stacked with 'push': mark
|
|
||||||
/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
|
|
||||||
|
|
||||||
#save testsaved2
|
|
||||||
#pattern -push
|
|
||||||
|
|
||||||
# Reload the patterns, then pop them one by one and check them.
|
|
||||||
|
|
||||||
#load testsaved1
|
|
||||||
#load testsaved2
|
|
||||||
|
|
||||||
#pop info
|
|
||||||
Capturing subpattern count = 2
|
|
||||||
Max back reference = 2
|
|
||||||
Named capturing subpatterns:
|
|
||||||
n 1
|
|
||||||
n 2
|
|
||||||
Options: dupnames
|
|
||||||
Starting code units: b f
|
|
||||||
Subject length lower bound = 6
|
|
||||||
foofoo
|
|
||||||
0: foofoo
|
|
||||||
1: foo
|
|
||||||
barbar
|
|
||||||
0: barbar
|
|
||||||
1: <unset>
|
|
||||||
2: bar
|
|
||||||
|
|
||||||
#pop mark
|
#pattern posix
|
||||||
C
|
|
||||||
0: C
|
/a\x{1234}b/utf
|
||||||
1: C
|
a\x{1234}b
|
||||||
MK: A
|
0: a\x{1234}b
|
||||||
D
|
|
||||||
No match, mark = A
|
/\w/
|
||||||
|
+++\x{c2}
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/\w/ucp
|
||||||
|
+++\x{c2}
|
||||||
|
0: \xc2
|
||||||
|
|
||||||
#pop
|
# End of testdata/testinput19
|
||||||
AmanaplanacanalPanama
|
|
||||||
0: AmanaplanacanalPanama
|
|
||||||
1: <unset>
|
|
||||||
2: <unset>
|
|
||||||
3: AmanaplanacanalPanama
|
|
||||||
4: A
|
|
||||||
|
|
||||||
#pop info
|
|
||||||
Capturing subpattern count = 4
|
|
||||||
Named capturing subpatterns:
|
|
||||||
ADDR 2
|
|
||||||
ADDRESS_PAT 4
|
|
||||||
NAME 1
|
|
||||||
NAME_PAT 3
|
|
||||||
Options: extended
|
|
||||||
Subject length lower bound = 3
|
|
||||||
metcalfe 33
|
|
||||||
0: metcalfe 33
|
|
||||||
1: metcalfe
|
|
||||||
2: 33
|
|
||||||
|
|
||||||
# Check for an error when different tables are used.
|
|
||||||
|
|
||||||
/abc/push,tables=1
|
|
||||||
/xyz/push,tables=2
|
|
||||||
#save testsaved1
|
|
||||||
Serialization failed: error -30: patterns do not all use the same character tables
|
|
||||||
|
|
||||||
#pop
|
|
||||||
xyz
|
|
||||||
0: xyz
|
|
||||||
|
|
||||||
#pop
|
|
||||||
abc
|
|
||||||
0: abc
|
|
||||||
|
|
||||||
#pop should give an error
|
|
||||||
** Can't pop off an empty stack
|
|
||||||
pqr
|
|
||||||
|
|
||||||
# End of testinput19
|
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
# This set of tests exercises the serialization/deserialization functions in
|
||||||
|
# the library. It does not use UTF or JIT.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
|
||||||
|
# Compile several patterns, push them onto the stack, and then write them
|
||||||
|
# all to a file.
|
||||||
|
|
||||||
|
#pattern push
|
||||||
|
|
||||||
|
/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
|
||||||
|
(?(DEFINE)
|
||||||
|
(?<NAME_PAT>[a-z]+)
|
||||||
|
(?<ADDRESS_PAT>\d+)
|
||||||
|
)/x
|
||||||
|
/^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
|
||||||
|
|
||||||
|
#save testsaved1
|
||||||
|
|
||||||
|
# Do it again for some more patterns.
|
||||||
|
|
||||||
|
/(*MARK:A)(*SKIP:B)(C|X)/mark
|
||||||
|
** Ignored when compiled pattern is stacked with 'push': mark
|
||||||
|
/(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
|
||||||
|
|
||||||
|
#save testsaved2
|
||||||
|
#pattern -push
|
||||||
|
|
||||||
|
# Reload the patterns, then pop them one by one and check them.
|
||||||
|
|
||||||
|
#load testsaved1
|
||||||
|
#load testsaved2
|
||||||
|
|
||||||
|
#pop info
|
||||||
|
Capturing subpattern count = 2
|
||||||
|
Max back reference = 2
|
||||||
|
Named capturing subpatterns:
|
||||||
|
n 1
|
||||||
|
n 2
|
||||||
|
Options: dupnames
|
||||||
|
Starting code units: b f
|
||||||
|
Subject length lower bound = 6
|
||||||
|
foofoo
|
||||||
|
0: foofoo
|
||||||
|
1: foo
|
||||||
|
barbar
|
||||||
|
0: barbar
|
||||||
|
1: <unset>
|
||||||
|
2: bar
|
||||||
|
|
||||||
|
#pop mark
|
||||||
|
C
|
||||||
|
0: C
|
||||||
|
1: C
|
||||||
|
MK: A
|
||||||
|
D
|
||||||
|
No match, mark = A
|
||||||
|
|
||||||
|
#pop
|
||||||
|
AmanaplanacanalPanama
|
||||||
|
0: AmanaplanacanalPanama
|
||||||
|
1: <unset>
|
||||||
|
2: <unset>
|
||||||
|
3: AmanaplanacanalPanama
|
||||||
|
4: A
|
||||||
|
|
||||||
|
#pop info
|
||||||
|
Capturing subpattern count = 4
|
||||||
|
Named capturing subpatterns:
|
||||||
|
ADDR 2
|
||||||
|
ADDRESS_PAT 4
|
||||||
|
NAME 1
|
||||||
|
NAME_PAT 3
|
||||||
|
Options: extended
|
||||||
|
Subject length lower bound = 3
|
||||||
|
metcalfe 33
|
||||||
|
0: metcalfe 33
|
||||||
|
1: metcalfe
|
||||||
|
2: 33
|
||||||
|
|
||||||
|
# Check for an error when different tables are used.
|
||||||
|
|
||||||
|
/abc/push,tables=1
|
||||||
|
/xyz/push,tables=2
|
||||||
|
#save testsaved1
|
||||||
|
Serialization failed: error -30: patterns do not all use the same character tables
|
||||||
|
|
||||||
|
#pop
|
||||||
|
xyz
|
||||||
|
0: xyz
|
||||||
|
|
||||||
|
#pop
|
||||||
|
abc
|
||||||
|
0: abc
|
||||||
|
|
||||||
|
#pop should give an error
|
||||||
|
** Can't pop off an empty stack
|
||||||
|
pqr
|
||||||
|
|
||||||
|
# End of testinput20
|
Loading…
Reference in New Issue