All tests except JIT and save/reload are implemented.

This commit is contained in:
Philip.Hazel 2014-08-05 16:51:32 +00:00
parent e2076960d4
commit e022475d54
29 changed files with 9495 additions and 348 deletions

481
RunTest
View File

@ -58,22 +58,18 @@ title5B=" and UCP support"
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
title7A="Test 7: DFA matching with UTF"
title7B=" and Unicode property support"
#title11="Test 11: Internal offsets and code size tests"
title8="Test 8: Internal offsets and code size tests"
title9="Test 9: Specials for the basic 8-bit library"
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
#title12="Test 12: JIT-specific features (when JIT is available)"
#title13="Test 13: JIT-specific features (when JIT is not available)"
#title14="Test 14: Specials for the basic 8-bit library"
#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
#title17="Test 17: Specials for the basic 16/32-bit library"
#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
#title20="Test 20: DFA specials for the basic 16/32-bit library"
#title21="Test 21: Reloads for the basic 16/32-bit library"
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
#title23="Test 23: Specials for the 16-bit library"
#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
#title25="Test 25: Specials for the 32-bit library"
#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
maxtest=2
@ -85,12 +81,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title5A $title5B
echo $title6
echo $title7A $title7B
# echo $title8
# echo $title9
# echo $title10
# echo $title11
# echo $title12
# echo $title13
echo $title8
echo $title9
echo $title10
echo $title11
echo $title12
echo $title13
# echo $title14
# echo $title15
# echo $title16
@ -100,10 +96,6 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
# echo $title20
# echo $title21
# echo $title22
# echo $title23
# echo $title24
# echo $title25
# echo $title26
exit 0
fi
@ -178,12 +170,12 @@ do4=no
do5=no
do6=no
do7=no
#do8=no
#do9=no
#do10=no
#do11=no
#do12=no
#do13=no
do8=no
do9=no
do10=no
do11=no
do12=no
do13=no
#do14=no
#do15=no
#do16=no
@ -193,10 +185,6 @@ do7=no
#do20=no
#do21=no
#do22=no
#do23=no
#do24=no
#do25=no
#do26=no
while [ $# -gt 0 ] ; do
case $1 in
@ -207,12 +195,12 @@ while [ $# -gt 0 ] ; do
5) do5=yes;;
6) do6=yes;;
7) do7=yes;;
# 8) do8=yes;;
# 9) do9=yes;;
# 10) do10=yes;;
# 11) do11=yes;;
# 12) do12=yes;;
# 13) do13=yes;;
8) do8=yes;;
9) do9=yes;;
10) do10=yes;;
11) do11=yes;;
12) do12=yes;;
13) do13=yes;;
# 14) do14=yes;;
# 15) do15=yes;;
# 16) do16=yes;;
@ -222,10 +210,6 @@ while [ $# -gt 0 ] ; do
# 20) do20=yes;;
# 21) do21=yes;;
# 22) do22=yes;;
# 23) do23=yes;;
# 24) do24=yes;;
# 25) do25=yes;;
# 26) do26=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
@ -330,7 +314,7 @@ else
fi
# UTF support always applies to all bit sizes if both are supported; we can't
# have UTF-8 support without UTF-16 support (for example).
# have UTF-8 support without UTF-16 or UTF-32 support.
$sim ./pcre2test -C utf >/dev/null
utf=$?
@ -346,14 +330,13 @@ fi
# relevant will be automatically skipped.
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
$do5 = no -a $do6 = no -a $do7 = no \
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
$do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
$do13 = no \
]; then
# -a $do8 = no -a \
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
# -a $do14 = no -a $do15 = no -a $do16 = no -a \
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
# $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
# $do25 = no -a $do26 = no
# $do21 = no -a $do22 = no
do1=yes
do2=yes
@ -362,12 +345,12 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
do5=yes
do6=yes
do7=yes
# do8=yes
# do9=yes
# do10=yes
# do11=yes
# do12=yes
# do13=yes
do8=yes
do9=yes
do10=yes
do11=yes
do12=yes
do13=yes
# do14=yes
# do15=yes
# do16=yes
@ -377,10 +360,6 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
# do20=yes
# do21=yes
# do22=yes
# do23=yes
# do24=yes
# do25=yes
# do26=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
@ -584,32 +563,137 @@ if [ $do7 = yes ] ; then
fi
fi
## Test of internal offsets and code sizes. This test is run only when there
## is Unicode property support and the link size is 2. The actual tests are
## mostly the same as in some of the above, but in this test we inspect some
## offsets and sizes that require a known link size. This is a doublecheck for
## the maintainer, just in case something changes unexpectely. The output from
## this test is not the same in 8-bit and 16-bit modes.
#
#if [ $do11 = yes ] ; then
# echo $title11
# if [ $link_size -ne 2 ] ; then
# echo " Skipped because link size is not 2"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput11-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
# Test of internal offsets and code sizes. This test is run only when there
# is UTF/UCP support and the link size is 2. The actual tests are
# mostly the same as in some of the above, but in this test we inspect some
# offsets and sizes that require a known link size. This is a doublecheck for
# the maintainer, just in case something changes unexpectely. The output from
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
# mode-specific output files.
if [ $do8 = yes ] ; then
echo $title8
if [ $link_size -ne 2 ] ; then
echo " Skipped because link size is not 2"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput8-$bits testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
echo " OK"
fi
fi
# Tests for 8-bit-specific features
if [ "$do9" = yes ] ; then
echo $title9
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput9 testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
fi
# Tests for UTF-8 and UCP 8-bit-specific features
if [ "$do10" = yes ] ; then
echo $title10
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput10 testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
fi
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
if [ $do11 = yes ] ; then
echo $title11
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput11-$bits testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
fi
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
# is different for the two widths.
if [ $do12 = yes ] ; then
echo $title12
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput12-$bits testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
fi
# Tests for 16/32-bit-specific features in DFA non-UTF modes
if [ $do13 = yes ] ; then
echo $title13
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput13 testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
echo " OK"
fi
fi
## Test JIT-specific features when JIT is available
#
#if [ $do12 = yes ] ; then
@ -644,169 +728,6 @@ fi
# fi
#fi
#
## Tests for 8-bit-specific features
#
#if [ "$do14" = yes ] ; then
# echo $title14
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# else
# cp -f $testdata/saved16 testsaved16
# cp -f $testdata/saved32 testsaved32
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput14 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 8-bit-specific features (needs UTF-8 support)
#
#if [ "$do15" = yes ] ; then
# echo $title15
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput15 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 8-bit-specific features (Unicode property support)
#
#if [ $do16 = yes ] ; then
# echo $title16
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput16 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features
#
#if [ $do17 = yes ] ; then
# echo $title17
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput17 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features (UTF-16/32 support)
#
#if [ $do18 = yes ] ; then
# echo $title18
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput18-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features (Unicode property support)
#
#if [ $do19 = yes ] ; then
# echo $title19
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput19 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
#
#if [ $do20 = yes ] ; then
# echo $title20
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput20 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for reloads with 16/32-bit library
#
#if [ $do21 = yes ] ; then
@ -855,70 +776,6 @@ fi
# echo " OK"
# fi
#fi
#
#if [ $do23 = yes ] ; then
# echo $title23
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
# echo " Skipped when running 8/32-bit tests"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput23 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do24 = yes ] ; then
# echo $title24
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
# echo " Skipped when running 8/32-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput24 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do25 = yes ] ; then
# echo $title25
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
# echo " Skipped when running 8/16-bit tests"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput25 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do26 = yes ] ; then
# echo $title26
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
# echo " Skipped when running 8/16-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput26 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
# End of loop for 8/16/32-bit tests
done

View File

@ -286,9 +286,10 @@ This is a pattern line whose modifier list starts with two one-letter modifiers
.SH "PATTERN SYNTAX"
.rs
.sp
A pattern line must start with one of the following characters:
A pattern line must start with one of the following characters (common symbols,
excluding pattern meta-characters):
.sp
" / ! ' ` - + = : ; . ,
/ ! " ' ` - = _ : ; , % & @ ~
.sp
This is interpreted as the pattern's delimiter. A regular expression may be
continued over several input lines, in which case the newline characters are

View File

@ -7833,11 +7833,12 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
8-bit UTF mode, codepoints in the range 128-255 are introductory code
points and cannot have another case. In 16-bit and 32-bit mode, we can
points and cannot have another case. In 16-bit and 32-bit modes, we can
check wide characters when UTF (and therefore UCP) is supported. */
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
else if (UCD_OTHERCASE(firstcu) != firstcu)
else if (firstcu <= MAX_UTF_CODE_POINT &&
UCD_OTHERCASE(firstcu) != firstcu)
re->flags |= PCRE2_FIRSTCASELESS;
#endif
}
@ -7870,7 +7871,7 @@ if (reqcuflags >= 0 &&
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
}
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
else if (UCD_OTHERCASE(reqcu) != reqcu)
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
re->flags |= PCRE2_LASTCASELESS;
#endif
}

View File

@ -184,8 +184,8 @@ static const char match_error_texts[] =
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */
"UTF-8 error: code point > 0x10ffff is not defined\0"
"UTF-8 error: code points 0xd000-0xdfff are not defined\0"
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0"
@ -198,8 +198,8 @@ static const char match_error_texts[] =
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-32 error: surrogate character not allowed\0"
"UTF-32 error: code point > 0x10ffff is not defined\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad count value\0"
/* 30 */
"pattern compiled with other endianness\0"

View File

@ -240,6 +240,10 @@ Unicode doesn't go beyond 0x0010ffff. */
#define NOTACHAR 0xffffffff
/* This is the largest valid UTF/Unicode code point. */
#define MAX_UTF_CODE_POINT 0x10ffff
/* Compile-time errors are added to this value. As they are documented, it
should probably never be changed. */
@ -574,9 +578,6 @@ total length. */
#define tables_length (ctypes_offset + 256)
/* -------------------- Character and string names ------------------------ */
/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal

View File

@ -279,9 +279,8 @@ static void
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
{
PCRE2_SPTR codestart, nametable, code;
uint32_t options = re->compile_options;
uint32_t nesize = re->name_entry_size;
BOOL utf = (options & PCRE2_UTF) != 0;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
code = codestart = nametable + re->name_count * re->name_entry_size;

View File

@ -125,7 +125,7 @@ PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence

View File

@ -147,8 +147,6 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
const char *message, *addmessage;
size_t length, addlength;
errcode -= COMPILE_ERROR_BASE;
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
"unknown error code" : pstring[errcode];
length = strlen(message) + 1;
@ -237,8 +235,8 @@ if (preg->re_pcre2_code == NULL)
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
preg->re_match_data = ((cflags & REG_NOSUB) != 0)? NULL :
pcre2_match_data_create(re_nsub + 1, NULL);
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
return 0;
}

View File

@ -497,7 +497,7 @@ static modstruct modlist[] = {
#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
#define POSIX_SUPPORTED_MATCH_CONTROLS ( 0 )
#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
/* Table of single-character abbreviated modifiers. The index field is
initialized to -1, but the first time the modifier is encountered, it is filled
@ -2884,7 +2884,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
((options & PCRE2_UTF) != 0)? " utf" : "",
((options & PCRE2_UCP) != 0)? " ucp" : "",
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
@ -3884,7 +3884,7 @@ static int
callout_function(pcre2_callout_block_8 *cb)
{
uint32_t i, pre_start, post_start, subject_length;
BOOL utf = (FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
FILE *f = (first_callout || callout_capture)? outfile : NULL;
@ -4033,8 +4033,10 @@ dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
/* Initialize for scanning the data line. */
utf = (pat_patctl.control & CTL_POSIX) == 0 &&
(FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
start_rep = NULL;
len = strlen((const char *)buffer);
while (len > 0 && isspace(buffer[len-1])) len--;
@ -4043,7 +4045,7 @@ p = buffer;
while (isspace(*p)) p++;
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
invalid input to pcre2_exec, you must use \x?? or \x{} sequences. */
invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
if (utf)
{
@ -4414,14 +4416,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
{
fprintf(outfile, "%2d: ", (int)i);
PCHARSV(dbuffer, pmatch[i].rm_so,
pmatch[i].rm_eo - pmatch[i].rm_so, FALSE, outfile);
pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
fprintf(outfile, "\n");
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
{
fprintf(outfile, "%2d+ ", (int)i);
PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
FALSE, outfile);
utf, outfile);
fprintf(outfile, "\n");
}
}
@ -5587,7 +5589,7 @@ while (notdone)
rc = process_command();
}
else if (strchr("\"/!'`-+=:;.,", *p) != NULL)
else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
{
rc = process_pattern();
dfa_matched = 0;

398
testdata/testinput10 vendored Normal file
View File

@ -0,0 +1,398 @@
# This set of tests is for UTF-8 support and Unicode property support, with
# relevance only for the 8-bit library.
/X(\C{3})/utf
X\x{1234}
/X(\C{4})/utf
X\x{1234}YZ
/X\C*/utf
XYZabcdce
/X\C*?/utf
XYZabcde
/X\C{3,5}/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
X\x{1234}\x{512}YZ
/X\C{3,5}?/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
/a\Cb/utf
aXb
a\nb
/a\C\Cb/utf
a\x{100}b
/ab\Cde/utf
abXde
/a\C\Cb/utf
a\x{100}b
** Failers
a\x{12257}b
/[Ã]/utf
/Ã/utf
/ÃÃÃxxx/utf
/badutf/utf
\xdf
\xef
\xef\x80
\xf7
\xf7\x80
\xf7\x80\x80
\xfb
\xfb\x80
\xfb\x80\x80
\xfb\x80\x80\x80
\xfd
\xfd\x80
\xfd\x80\x80
\xfd\x80\x80\x80
\xfd\x80\x80\x80\x80
\xdf\x7f
\xef\x7f\x80
\xef\x80\x7f
\xf7\x7f\x80\x80
\xf7\x80\x7f\x80
\xf7\x80\x80\x7f
\xfb\x7f\x80\x80\x80
\xfb\x80\x7f\x80\x80
\xfb\x80\x80\x7f\x80
\xfb\x80\x80\x80\x7f
\xfd\x7f\x80\x80\x80\x80
\xfd\x80\x7f\x80\x80\x80
\xfd\x80\x80\x7f\x80\x80
\xfd\x80\x80\x80\x7f\x80
\xfd\x80\x80\x80\x80\x7f
\xed\xa0\x80
\xc0\x8f
\xe0\x80\x8f
\xf0\x80\x80\x8f
\xf8\x80\x80\x80\x8f
\xfc\x80\x80\x80\x80\x8f
\x80
\xfe
\xff
/badutf/utf
\xfb\x80\x80\x80\x80
\xfd\x80\x80\x80\x80\x80
\xf7\xbf\xbf\xbf
/shortutf/utf
\xdf\=ph
\xef\=ph
\xef\x80\=ph
\xf7\=ph
\xf7\x80\=ph
\xf7\x80\x80\=ph
\xfb\=ph
\xfb\x80\=ph
\xfb\x80\x80\=ph
\xfb\x80\x80\x80\=ph
\xfd\=ph
\xfd\x80\=ph
\xfd\x80\x80\=ph
\xfd\x80\x80\x80\=ph
\xfd\x80\x80\x80\x80\=ph
/anything/utf
\xc0\x80
\xc1\x8f
\xe0\x9f\x80
\xf0\x8f\x80\x80
\xf8\x87\x80\x80\x80
\xfc\x83\x80\x80\x80\x80
\xfe\x80\x80\x80\x80\x80
\xff\x80\x80\x80\x80\x80
\xc3\x8f
\xe0\xaf\x80
\xe1\x80\x80
\xf0\x9f\x80\x80
\xf1\x8f\x80\x80
\xf8\x88\x80\x80\x80
\xf9\x87\x80\x80\x80
\xfc\x84\x80\x80\x80\x80
\xfd\x83\x80\x80\x80\x80
\xf8\x88\x80\x80\x80\=no_utf_check
\xf9\x87\x80\x80\x80\=no_utf_check
\xfc\x84\x80\x80\x80\x80\=no_utf_check
\xfd\x83\x80\x80\x80\x80\=no_utf_check
/\x{100}/IB,utf
/\x{1000}/IB,utf
/\x{10000}/IB,utf
/\x{100000}/IB,utf
/\x{10ffff}/IB,utf
/[\x{ff}]/IB,utf
/[\x{100}]/IB,utf
/\x80/IB,utf
/\xff/IB,utf
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
\x{D55c}\x{ad6d}\x{C5B4}
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
\x{65e5}\x{672c}\x{8a9e}
/\x{80}/IB,utf
/\x{084}/IB,utf
/\x{104}/IB,utf
/\x{861}/IB,utf
/\x{212ab}/IB,utf
# This one is here not because it's different to Perl, but because the way
# the captured single-byte is displayed. (In Perl it becomes a character, and you
# can't tell the difference.)
/X(\C)(.*)/utf
X\x{1234}
X\nabc
# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).
/a\Cb/utf
*** Failers
a\x{100}b
/[^ab\xC0-\xF0]/IB,utf
\x{f1}
\x{bf}
\x{100}
\x{1000}
*** Failers
\x{c0}
\x{f0}
/Ä€{3,4}/IB,utf
\x{100}\x{100}\x{100}\x{100\x{100}
/(\x{100}+|x)/IB,utf
/(\x{100}*a|x)/IB,utf
/(\x{100}{0,2}a|x)/IB,utf
/(\x{100}{1,2}a|x)/IB,utf
/\x{100}/IB,utf
/a\x{100}\x{101}*/IB,utf
/a\x{100}\x{101}+/IB,utf
/[^\x{c4}]/IB
/[\x{100}]/IB,utf
\x{100}
Z\x{100}
\x{100}Z
*** Failers
/[\xff]/IB,utf
>\x{ff}<
/[^\xff]/IB,utf
/\x{100}abc(xyz(?1))/IB,utf
/a\x{1234}b/utf,posix
a\x{1234}b
/\777/I,utf
\x{1ff}
\777
/\x{100}+\x{200}/IB,utf
/\x{100}+X/IB,utf
/^[\QÄ€\E-\QÅ<51>\E/B,utf
# This tests the stricter UTF-8 check according to RFC 3629.
/X/utf
\x{d800}
\x{d800}\=no_utf_check
\x{da00}
\x{da00}\=no_utf_check
\x{dfff}
\x{dfff}\=no_utf_check
\x{110000}
\x{110000}\=no_utf_check
\x{2000000}
\x{2000000}\=no_utf_check
\x{7fffffff}
\x{7fffffff}\=no_utf_check
/(*UTF8)\x{1234}/
abcd\x{1234}pqr
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
/\h/I,utf
ABC\x{09}
ABC\x{20}
ABC\x{a0}
ABC\x{1680}
ABC\x{180e}
ABC\x{2000}
ABC\x{202f}
ABC\x{205f}
ABC\x{3000}
/\v/I,utf
ABC\x{0a}
ABC\x{0b}
ABC\x{0c}
ABC\x{0d}
ABC\x{85}
ABC\x{2028}
/\h*A/I,utf
CDBABC
/\v+A/I,utf
/\s?xxx\s/I,utf
/\sxxx\s/I,utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/I,utf,tables=2
\x{a2} \x{84}
A Z
/a+/utf
a\x{123}aa\=offset=1
a\x{123}aa\=offset=2
a\x{123}aa\=offset=3
a\x{123}aa\=offset=4
a\x{123}aa\=offset=5
a\x{123}aa\=offset=6
/\x{1234}+/Ii,utf
/\x{1234}+?/Ii,utf
/\x{1234}++/Ii,utf
/\x{1234}{2}/Ii,utf
/[^\x{c4}]/IB,utf
/X+\x{200}/IB,utf
/\R/I,utf
/\777/IB,utf
/\w+\x{C4}/B,utf
a\x{C4}\x{C4}
/\w+\x{C4}/B,utf,tables=2
a\x{C4}\x{C4}
/\W+\x{C4}/B,utf
!\x{C4}
/\W+\x{C4}/B,utf,tables=2
!\x{C4}
/\W+\x{A1}/B,utf
!\x{A1}
/\W+\x{A1}/B,utf,tables=2
!\x{A1}
/X\s+\x{A0}/B,utf
X\x20\x{A0}\x{A0}
/X\s+\x{A0}/B,utf,tables=2
X\x20\x{A0}\x{A0}
/\S+\x{A0}/B,utf
X\x{A0}\x{A0}
/\S+\x{A0}/B,utf,tables=2
X\x{A0}\x{A0}
/\x{a0}+\s!/B,utf
\x{a0}\x20!
/\x{a0}+\s!/B,utf,tables=2
\x{a0}\x20!
/A/utf
\x{ff000041}
\x{7f000041}
/(*UTF8)abc/never_utf
/abc/utf,never_utf
/\w/posix
+++\x{c2}
/\w/ucp,posix
+++\x{c2}
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
/AB\x{1fb0}/IB,utf
/AB\x{1fb0}/IBi,utf
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
/[â±¥]/Bi,utf
/[^â±¥]/Bi,utf
/\h/I
/\v/I
/\R/I
/[[:blank:]]/B,ucp
/\x{212a}+/Ii,utf
KKkk\x{212a}
/s+/Ii,utf
SSss\x{17f}
# End of testinput10

357
testdata/testinput11 vendored Normal file
View File

@ -0,0 +1,357 @@
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
# features that are not compatible with the 8-bit library, or which give
# different output in 16-bit or 32-bit mode. The output for the two widths is
# different, so they have separate output files.
#forbid_utf
/a\Cb/
aXb
a\nb
/[^\x{c4}]/IB
/\x{100}/I
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
/[\h]/B
>\x09<
/[\h]+/B
>\x09\x20\xa0<
/[\v]/B
/[^\h]/B
/\h+/I
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
/[\h\x{dc00}]+/IB
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
/\H+/I
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
/\v+/I
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/[\v\x{dc00}]+/IB
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/\V+/I
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
/\R+/I,bsr=unicode
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
/^\x{ffff}+/i
\x{ffff}
/^\x{ffff}?/i
\x{ffff}
/^\x{ffff}*/i
\x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
/[^\x00-a]{12,}[^b-\xff]*/B
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
/^[\x{1234}\x{4321}]{2,4}?/
\x{1234}\x{1234}\x{1234}
# Check maximum non-UTF character size for the 16-bit library.
/\x{ffff}/
A\x{ffff}B
/\x{10000}/
/\o{20000}/
# Check maximum character size for the 32-bit library. These will all give
# errors in the 16-bit library.
/\x{110000}/
/\x{7fffffff}/
/\x{80000000}/
/\x{ffffffff}/
/\x{100000000}/
/\o{17777777777}/
/\o{20000000000}/
/\o{37777777777}/
/\o{40000000000}/
/\x{7fffffff}\x{7fffffff}/I
/\x{80000000}\x{80000000}/I
/\x{ffffffff}\x{ffffffff}/I
# Non-UTF characters
/\C{2,3}/
\x{400000}\x{400001}\x{400002}\x{400003}
/\x{400000}\x{800000}/IBi
# Check character ranges
/[\H]/IB
/[\V]/IB
# End of testinput11

332
testdata/testinput12 vendored Normal file
View File

@ -0,0 +1,332 @@
# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to
# the 16-bit and 32-bit libraries. The output is different for each library,
# so there are separate output files.
/ÃÃÃxxx/IB,utf,no_utf_check
/abc/utf
Ã]
/X(\C{3})/utf
X\x{11234}Y
X\x{11234}YZ
/X(\C{4})/utf
X\x{11234}YZ
X\x{11234}YZW
/X\C*/utf
XYZabcdce
/X\C*?/utf
XYZabcde
/X\C{3,5}/utf
Xabcdefg
X\x{11234}Y
X\x{11234}YZ
X\x{11234}\x{512}
X\x{11234}\x{512}YZ
X\x{11234}\x{512}\x{11234}Z
/X\C{3,5}?/utf
Xabcdefg
X\x{11234}Y
X\x{11234}YZ
X\x{11234}\x{512}YZ
*** Failers
X\x{11234}
/a\Cb/utf
aXb
a\nb
/a\C\Cb/utf
a\x{12257}b
a\x{12257}\x{11234}b
** Failers
a\x{100}b
/ab\Cde/utf
abXde
# Check maximum character size
/\x{ffff}/IB,utf
/\x{10000}/IB,utf
/\x{100}/IB,utf
/\x{1000}/IB,utf
/\x{10000}/IB,utf
/\x{100000}/IB,utf
/\x{10ffff}/IB,utf
/[\x{ff}]/IB,utf
/[\x{100}]/IB,utf
/\x80/IB,utf
/\xff/IB,utf
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
\x{D55c}\x{ad6d}\x{C5B4}
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
\x{65e5}\x{672c}\x{8a9e}
/\x{80}/IB,utf
/\x{084}/IB,utf
/\x{104}/IB,utf
/\x{861}/IB,utf
/\x{212ab}/IB,utf
# This one is here not because it's different to Perl, but because the way
# the captured single-byte is displayed. (In Perl it becomes a character, and you
# can't tell the difference.)
/X(\C)(.*)/utf
X\x{1234}
X\nabc
# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).
/a\Cb/utf
*** Failers
a\x{100}b
/[^ab\xC0-\xF0]/IB,utf
\x{f1}
\x{bf}
\x{100}
\x{1000}
*** Failers
\x{c0}
\x{f0}
/Ä€{3,4}/IB,utf
\x{100}\x{100}\x{100}\x{100\x{100}
/(\x{100}+|x)/IB,utf
/(\x{100}*a|x)/IB,utf
/(\x{100}{0,2}a|x)/IB,utf
/(\x{100}{1,2}a|x)/IB,utf
/\x{100}/IB,utf
/a\x{100}\x{101}*/IB,utf
/a\x{100}\x{101}+/IB,utf
/[^\x{c4}]/IB
/[\x{100}]/IB,utf
\x{100}
Z\x{100}
\x{100}Z
*** Failers
/[\xff]/IB,utf
>\x{ff}<
/[^\xff]/IB,utf
/\x{100}abc(xyz(?1))/IB,utf
/\777/I,utf
\x{1ff}
\777
/\x{100}+\x{200}/IB,utf
/\x{100}+X/IB,utf
/^[\QÄ€\E-\QÅ<51>\E/B,utf
/X/utf
\x{d800}
\x{d800}\=no_utf_check
\x{da00}
\x{da00}\=no_utf_check
\x{dc00}
\x{dc00}\=no_utf_check
\x{de00}
\x{de00}\=no_utf_check
\x{dfff}
\x{dfff}\=no_utf_check
\x{110000}
\x{d800}\x{1234}
/(*UTF16)\x{11234}/
abcd\x{11234}pqr
/(*UTF)\x{11234}/I
abcd\x{11234}pqr
/(*UTF-32)\x{11234}/
abcd\x{11234}pqr
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
/\h/I,utf
ABC\x{09}
ABC\x{20}
ABC\x{a0}
ABC\x{1680}
ABC\x{180e}
ABC\x{2000}
ABC\x{202f}
ABC\x{205f}
ABC\x{3000}
/\v/I,utf
ABC\x{0a}
ABC\x{0b}
ABC\x{0c}
ABC\x{0d}
ABC\x{85}
ABC\x{2028}
/\h*A/I,utf
CDBABC
\x{2000}ABC
/\R*A/I,bsr=unicode,utf
CDBABC
\x{2028}A
/\v+A/I,utf
/\s?xxx\s/I,utf
/\sxxx\s/I,utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/I,utf,tables=2
\x{a2} \x{84}
A Z
/a+/utf
a\x{123}aa\=offset=1
a\x{123}aa\=offset=2
a\x{123}aa\=offset=3
a\x{123}aa\=offset=4
a\x{123}aa\=offset=5
a\x{123}aa\=offset=6
/\x{1234}+/Ii,utf
/\x{1234}+?/Ii,utf
/\x{1234}++/Ii,utf
/\x{1234}{2}/Ii,utf
/[^\x{c4}]/IB,utf
/X+\x{200}/IB,utf
/\R/I,utf
# Check bad offset
/a/utf
\x{10000}\=offset=1
\x{10000}ab\=offset=1
\x{10000}ab\=offset=2
\x{10000}ab\=offset=3
\x{10000}ab\=offset=4
\x{10000}ab\=offset=5
/í¼€/utf
/\w+\x{C4}/B,utf
a\x{C4}\x{C4}
/\w+\x{C4}/B,utf,tables=2
a\x{C4}\x{C4}
/\W+\x{C4}/B,utf
!\x{C4}
/\W+\x{C4}/B,utf,tables=2
!\x{C4}
/\W+\x{A1}/B,utf
!\x{A1}
/\W+\x{A1}/B,utf,tables=2
!\x{A1}
/X\s+\x{A0}/B,utf
X\x20\x{A0}\x{A0}
/X\s+\x{A0}/B,utf,tables=2
X\x20\x{A0}\x{A0}
/\S+\x{A0}/B,utf
X\x{A0}\x{A0}
/\S+\x{A0}/B,utf,tables=2
X\x{A0}\x{A0}
/\x{a0}+\s!/B,utf
\x{a0}\x20!
/\x{a0}+\s!/B,utf,tables=2
\x{a0}\x20!
/(*UTF)abc/never_utf
/abc/utf,never_utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
/AB\x{1fb0}/IB,utf
/AB\x{1fb0}/IBi,utf
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
/[â±¥]/Bi,utf
/[^â±¥]/Bi,utf
/[[:blank:]]/B,ucp
/\x{212a}+/Ii,utf
KKkk\x{212a}
/s+/Ii,utf
SSss\x{17f}
# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
/\x{110000}/utf
/\o{4200000}/utf
/\C/utf
\x{110000}
# End of testinput12

22
testdata/testinput13 vendored Normal file
View File

@ -0,0 +1,22 @@
# These DFA tests are for the handling of characters greater than 255 in
# 16-bit or 32-bit, non-UTF mode.
#forbid_utf
#subject dfa
/^\x{ffff}+/i
\x{ffff}
/^\x{ffff}?/i
\x{ffff}
/^\x{ffff}*/i
\x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
# End of testinput13

2
testdata/testinput2 vendored
View File

@ -1593,7 +1593,7 @@ a random value. /Ix
abc\rdef
abc\r\ndef
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
/* this is a C style comment */\=find_limits
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I

4
testdata/testinput5 vendored
View File

@ -573,11 +573,11 @@
/X\W{3}X/utf
X\=ps
/\sxxx\s/utf,tables=1
/\sxxx\s/utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/utf,tables=1
/\S \S/utf,tables=2
\x{a2} \x{84}
'A#хц'Bx,newline=any,utf

141
testdata/testinput8 vendored Normal file
View File

@ -0,0 +1,141 @@
# These are a few representative patterns whose lengths and offsets are to be
# shown when the link size is 2. This is just a doublecheck test to ensure the
# sizes don't go horribly wrong when something is changed. The pattern contents
# are all themselves checked in other tests. Unicode, including property
# support, is required for these tests.
#pattern fullbincode,memory
/((?i)b)/
/(?s)(.*X|^B)/
/(?s:.*X|^B)/
/^[[:alnum:]]/
/#/Ix
/a#/Ix
/x?+/
/x++/
/x{1,3}+/
/(x)*+/
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
/(a(?1)b)/
/(a(?1)+b)/
/a(?P<name1>b|c)d(?P<longername2>e)/
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
/(?P<a>a)...(?P=a)bbb(?P>a)d/
/abc(?C255)de(?C)f/
/abcde/auto_callout
/\x{100}/utf
/\x{1000}/utf
/\x{10000}/utf
/\x{100000}/utf
/\x{10ffff}/utf
/\x{110000}/utf
/[\x{ff}]/utf
/[\x{100}]/utf
/\x80/utf
/\xff/utf
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
/\x{65e5}\x{672c}\x{8a9e}/I,utf
/[\x{100}]/utf
/[Z\x{100}]/utf
/^[\x{100}\E-\Q\E\x{150}]/utf
/^[\QĀ\E-\QŐ\E]/utf
/^[\QĀ\E-\QŐ\E/utf
/[\p{L}]/
/[\p{^L}]/
/[\P{L}]/
/[\P{^L}]/
/[abc\p{L}\x{0660}]/utf
/[\p{Nd}]/utf
/[\p{Nd}+-]+/utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
/[\x{105}-\x{109}]/i,utf
/( ( (?(1)0|) )* )/x
/( (?(1)0|)* )/x
/[a]/
/[a]/utf
/[\xaa]/
/[\xaa]/utf
/[^a]/
/[^a]/utf
/[^\xaa]/
/[^\xaa]/utf
#pattern -memory
/[^\d]/utf,ucp
/[[:^alpha:][:^cntrl:]]+/utf,ucp
/[[:^cntrl:][:^alpha:]]+/utf,ucp
/[[:alpha:]]+/utf,ucp
/[[:^alpha:]\S]+/utf,ucp
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
/(((a\2)|(a*)\g<-1>))*a?/
# End of testinput8

333
testdata/testinput9 vendored Normal file
View File

@ -0,0 +1,333 @@
# This set of tests is run only with the 8-bit library. They do not require
# UTF-8 or Unicode property support. The file starts with all the tests of
# the POSIX interface, because that is supported only with the 8-bit library.
#forbid_utf
#pattern posix
/abc/
abc
*** Failers
/^abc|def/
abcdef
abcdef\=notbol
/.*((abc)$|(def))/
defabc
defabc\=noteol
/the quick brown fox/
the quick brown fox
*** Failers
The Quick Brown Fox
/the quick brown fox/i
the quick brown fox
The Quick Brown Fox
/abc.def/
*** Failers
abc\ndef
/abc$/
abc
abc\n
/(abc)\2/
/(abc\1)/
abc
/a*(b+)(z)(z)/
aaaabbbbzzzz
aaaabbbbzzzz\=ovector=0
aaaabbbbzzzz\=ovector=1
aaaabbbbzzzz\=ovector=2
/ab.cd/
ab-cd
ab=cd
** Failers
ab\ncd
/ab.cd/s
ab-cd
ab=cd
ab\ncd
/a(b)c/no_auto_capture
abc
/a(?P<name>b)c/no_auto_capture
abc
/a?|b?/
abc
** Failers
ddd\=notempty
/\w+A/
CDAAAAB
/\w+A/ungreedy
CDAAAAB
/\Biss\B/I,aftertext
Mississippi
/abc/\
#pattern -posix
# End of POSIX tests
/a\Cb/
aXb
a\nb
** Failers (too big char)
A\x{123}B
A\o{443}B
/\x{100}/I
/\o{400}/I
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
/\h/I
/\H/I
/\v/I
/\V/I
/\R/I
/[\h]/B
>\x09<
/[\h]+/B
>\x09\x20\xa0<
/[\v]/B
/[\H]/B
/[^\h]/B
/[\V]/B
/[\x0a\V]/B
/\777/I
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
/[^\x00-a]{12,}[^b-\xff]*/B
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
# End of testinput9

1273
testdata/testoutput10 vendored Normal file

File diff suppressed because it is too large Load Diff

675
testdata/testoutput11-16 vendored Normal file
View File

@ -0,0 +1,675 @@
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
# features that are not compatible with the 8-bit library, or which give
# different output in 16-bit or 32-bit mode. The output for the two widths is
# different, so they have separate output files.
#forbid_utf
/a\Cb/
aXb
0: aXb
a\nb
0: a\x0ab
/[^\x{c4}]/IB
------------------------------------------------------------------
Bra
[^\x{c4}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
/\x{100}/I
Capturing subpattern count = 0
No options
First code unit = \x{100}
No last code unit
Subject length lower bound = 1
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
No last code unit
Subject length lower bound = 3
/[\h]/B
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
Ket
End
------------------------------------------------------------------
>\x09<
0: \x09
/[\h]+/B
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
Ket
End
------------------------------------------------------------------
>\x09\x20\xa0<
0: \x09 \xa0
/[\v]/B
------------------------------------------------------------------
Bra
[\x0a-\x0d\x85\x{2028}-\x{2029}]
Ket
End
------------------------------------------------------------------
/[^\h]/B
------------------------------------------------------------------
Bra
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
Ket
End
------------------------------------------------------------------
/\h+/I
Capturing subpattern count = 0
No options
Starting code units: \x09 \x20 \xa0 \xff
No last code unit
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
0: \x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
0: \x{200a}\xa0\x{2000}
/[\h\x{dc00}]+/IB
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x09 \x20 \xa0 \xff
No last code unit
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
0: \x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
0: \x{200a}\xa0\x{2000}
/\H+/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
0: \x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
0: \x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
0: \x9f\xa1\x{2fff}\x{3001}
/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
0: \x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
0: \x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
0: \x9f\xa1\x{2fff}\x{3001}
/\v+/I
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/[\v\x{dc00}]+/IB
------------------------------------------------------------------
Bra
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/\V+/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
0: \x09\x0e\x84\x86
/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
0: \x09\x0e\x84\x86
/\R+/I,bsr=unicode
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
Capturing subpattern count = 0
No options
First code unit = \x{d800}
Last code unit = \x{dd00}
Subject length lower bound = 6
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
------------------------------------------------------------------
Bra
[^\x{80}]
[^\x{ff}]
[^\x{100}]
[^\x{1000}]
[^\x{ffff}]
Ket
End
------------------------------------------------------------------
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
------------------------------------------------------------------
Bra
/i [^\x{80}]
/i [^\x{ff}]
/i [^\x{100}]
/i [^\x{1000}]
/i [^\x{ffff}]
Ket
End
------------------------------------------------------------------
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
------------------------------------------------------------------
Bra
[^\x{100}]*
[^\x{1000}]+
[^\x{ffff}]??
[^\x{8000}]{4}
[^\x{8000}]*
[^\x{7fff}]{2}
[^\x{7fff}]{0,7}?
[^\x{100}]{5}
[^\x{100}]?+
Ket
End
------------------------------------------------------------------
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
------------------------------------------------------------------
Bra
/i [^\x{100}]*
/i [^\x{1000}]+
/i [^\x{ffff}]??
/i [^\x{8000}]{4}
/i [^\x{8000}]*
/i [^\x{7fff}]{2}
/i [^\x{7fff}]{0,7}?
/i [^\x{100}]{5}
/i [^\x{100}]?+
Ket
End
------------------------------------------------------------------
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
\x{100}
Ket
End
------------------------------------------------------------------
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
[\x{100}-\x{200}]
Ket
End
------------------------------------------------------------------
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
\x{d800}
Ket
End
------------------------------------------------------------------
/^\x{ffff}+/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}?/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}*/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
0: \x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
0: \x{ffff}
/[^\x00-a]{12,}[^b-\xff]*/B
------------------------------------------------------------------
Bra
[b-\xff] (neg){12,}
[\x00-a] (neg)*+
Ket
End
------------------------------------------------------------------
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
------------------------------------------------------------------
Bra
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
\s*
[0-9A-Z_a-z]++
\W+
[\x00-/:-\xff] (neg)*?
\d
0
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
\w*
A
Ket
End
------------------------------------------------------------------
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
------------------------------------------------------------------
Bra
a*
[b-\xff\x{100}-\x{200}]?+
a#
a*+
[b-\xff\x{100}-\x{200}]?
b#
[a-f]*+
[g-\xff\x{100}-\x{200}]*+
#
[g-\xff\x{100}-\x{200}]*+
[a-c]*+
#
[g-\xff\x{100}-\x{200}]*
[a-h]*+
Ket
End
------------------------------------------------------------------
/^[\x{1234}\x{4321}]{2,4}?/
\x{1234}\x{1234}\x{1234}
0: \x{1234}\x{1234}
# Check maximum non-UTF character size for the 16-bit library.
/\x{ffff}/
A\x{ffff}B
0: \x{ffff}
/\x{10000}/
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
/\o{20000}/
# Check maximum character size for the 32-bit library. These will all give
# errors in the 16-bit library.
/\x{110000}/
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
/\x{7fffffff}/
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\x{80000000}/
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\x{ffffffff}/
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\x{100000000}/
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
/\o{17777777777}/
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\o{20000000000}/
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\o{37777777777}/
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\o{40000000000}/
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\x{7fffffff}\x{7fffffff}/I
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\x{80000000}\x{80000000}/I
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
/\x{ffffffff}\x{ffffffff}/I
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
# Non-UTF characters
/\C{2,3}/
\x{400000}\x{400001}\x{400002}\x{400003}
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
0: \x00\x01\x02
/\x{400000}\x{800000}/IBi
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
# Check character ranges
/[\H]/IB
------------------------------------------------------------------
Bra
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
\xfa \xfb \xfc \xfd \xfe \xff
No last code unit
Subject length lower bound = 1
/[\V]/IB
------------------------------------------------------------------
Bra
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
\xfc \xfd \xfe \xff
No last code unit
Subject length lower bound = 1
# End of testinput11

681
testdata/testoutput11-32 vendored Normal file
View File

@ -0,0 +1,681 @@
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
# features that are not compatible with the 8-bit library, or which give
# different output in 16-bit or 32-bit mode. The output for the two widths is
# different, so they have separate output files.
#forbid_utf
/a\Cb/
aXb
0: aXb
a\nb
0: a\x0ab
/[^\x{c4}]/IB
------------------------------------------------------------------
Bra
[^\x{c4}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
/\x{100}/I
Capturing subpattern count = 0
No options
First code unit = \x{100}
No last code unit
Subject length lower bound = 1
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
No last code unit
Subject length lower bound = 3
/[\h]/B
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
Ket
End
------------------------------------------------------------------
>\x09<
0: \x09
/[\h]+/B
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
Ket
End
------------------------------------------------------------------
>\x09\x20\xa0<
0: \x09 \xa0
/[\v]/B
------------------------------------------------------------------
Bra
[\x0a-\x0d\x85\x{2028}-\x{2029}]
Ket
End
------------------------------------------------------------------
/[^\h]/B
------------------------------------------------------------------
Bra
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
Ket
End
------------------------------------------------------------------
/\h+/I
Capturing subpattern count = 0
No options
Starting code units: \x09 \x20 \xa0 \xff
No last code unit
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
0: \x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
0: \x{200a}\xa0\x{2000}
/[\h\x{dc00}]+/IB
------------------------------------------------------------------
Bra
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x09 \x20 \xa0 \xff
No last code unit
Subject length lower bound = 1
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
0: \x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
0: \x{200a}\xa0\x{2000}
/\H+/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
0: \x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
0: \x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
0: \x9f\xa1\x{2fff}\x{3001}
/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
0: \x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
0: \x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
0: \x9f\xa1\x{2fff}\x{3001}
/\v+/I
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/[\v\x{dc00}]+/IB
------------------------------------------------------------------
Bra
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/\V+/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
0: \x09\x0e\x84\x86
/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
0: \x09\x0e\x84\x86
/\R+/I,bsr=unicode
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
No last code unit
Subject length lower bound = 1
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
0: \x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
Capturing subpattern count = 0
No options
First code unit = \x{d800}
Last code unit = \x{dd00}
Subject length lower bound = 6
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
------------------------------------------------------------------
Bra
[^\x{80}]
[^\x{ff}]
[^\x{100}]
[^\x{1000}]
[^\x{ffff}]
Ket
End
------------------------------------------------------------------
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
------------------------------------------------------------------
Bra
/i [^\x{80}]
/i [^\x{ff}]
/i [^\x{100}]
/i [^\x{1000}]
/i [^\x{ffff}]
Ket
End
------------------------------------------------------------------
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
------------------------------------------------------------------
Bra
[^\x{100}]*
[^\x{1000}]+
[^\x{ffff}]??
[^\x{8000}]{4}
[^\x{8000}]*
[^\x{7fff}]{2}
[^\x{7fff}]{0,7}?
[^\x{100}]{5}
[^\x{100}]?+
Ket
End
------------------------------------------------------------------
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
------------------------------------------------------------------
Bra
/i [^\x{100}]*
/i [^\x{1000}]+
/i [^\x{ffff}]??
/i [^\x{8000}]{4}
/i [^\x{8000}]*
/i [^\x{7fff}]{2}
/i [^\x{7fff}]{0,7}?
/i [^\x{100}]{5}
/i [^\x{100}]?+
Ket
End
------------------------------------------------------------------
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
\x{100}
Ket
End
------------------------------------------------------------------
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
[\x{100}-\x{200}]
Ket
End
------------------------------------------------------------------
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
------------------------------------------------------------------
Bra
\x{d800}
Ket
End
------------------------------------------------------------------
/^\x{ffff}+/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}?/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}*/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
0: \x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
0: \x{ffff}
/[^\x00-a]{12,}[^b-\xff]*/B
------------------------------------------------------------------
Bra
[b-\xff] (neg){12,}
[\x00-a] (neg)*+
Ket
End
------------------------------------------------------------------
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
------------------------------------------------------------------
Bra
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
\s*
[0-9A-Z_a-z]++
\W+
[\x00-/:-\xff] (neg)*?
\d
0
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
\w*
A
Ket
End
------------------------------------------------------------------
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
------------------------------------------------------------------
Bra
a*
[b-\xff\x{100}-\x{200}]?+
a#
a*+
[b-\xff\x{100}-\x{200}]?
b#
[a-f]*+
[g-\xff\x{100}-\x{200}]*+
#
[g-\xff\x{100}-\x{200}]*+
[a-c]*+
#
[g-\xff\x{100}-\x{200}]*
[a-h]*+
Ket
End
------------------------------------------------------------------
/^[\x{1234}\x{4321}]{2,4}?/
\x{1234}\x{1234}\x{1234}
0: \x{1234}\x{1234}
# Check maximum non-UTF character size for the 16-bit library.
/\x{ffff}/
A\x{ffff}B
0: \x{ffff}
/\x{10000}/
/\o{20000}/
# Check maximum character size for the 32-bit library. These will all give
# errors in the 16-bit library.
/\x{110000}/
/\x{7fffffff}/
/\x{80000000}/
/\x{ffffffff}/
/\x{100000000}/
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
/\o{17777777777}/
/\o{20000000000}/
/\o{37777777777}/
/\o{40000000000}/
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
/\x{7fffffff}\x{7fffffff}/I
Capturing subpattern count = 0
No options
First code unit = \x{7fffffff}
Last code unit = \x{7fffffff}
Subject length lower bound = 2
/\x{80000000}\x{80000000}/I
Capturing subpattern count = 0
No options
First code unit = \x{80000000}
Last code unit = \x{80000000}
Subject length lower bound = 2
/\x{ffffffff}\x{ffffffff}/I
Capturing subpattern count = 0
No options
First code unit = \x{ffffffff}
Last code unit = \x{ffffffff}
Subject length lower bound = 2
# Non-UTF characters
/\C{2,3}/
\x{400000}\x{400001}\x{400002}\x{400003}
0: \x{400000}\x{400001}\x{400002}
/\x{400000}\x{800000}/IBi
------------------------------------------------------------------
Bra
/i \x{400000}\x{800000}
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: caseless
First code unit = \x{400000}
Last code unit = \x{800000}
Subject length lower bound = 2
# Check character ranges
/[\H]/IB
------------------------------------------------------------------
Bra
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
\xfa \xfb \xfc \xfd \xfe \xff
No last code unit
Subject length lower bound = 1
/[\V]/IB
------------------------------------------------------------------
Bra
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
No options
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
\xfc \xfd \xfe \xff
No last code unit
Subject length lower bound = 1
# End of testinput11

1159
testdata/testoutput12-16 vendored Normal file

File diff suppressed because it is too large Load Diff

1157
testdata/testoutput12-32 vendored Normal file

File diff suppressed because it is too large Load Diff

27
testdata/testoutput13 vendored Normal file
View File

@ -0,0 +1,27 @@
# These DFA tests are for the handling of characters greater than 255 in
# 16-bit or 32-bit, non-UTF mode.
#forbid_utf
#subject dfa
/^\x{ffff}+/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}?/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}*/i
\x{ffff}
0: \x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
0: \x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
0: \x{ffff}
# End of testinput13

View File

@ -6723,7 +6723,7 @@ Subject length lower bound = 5
1: \x0d
2: \x0a
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
Capturing subpattern count = 1
May match empty string
No options

View File

@ -1281,13 +1281,13 @@ Partial match: abcde
X\=ps
Partial match: X
/\sxxx\s/utf,tables=1
/\sxxx\s/utf,tables=2
AB\x{85}xxx\x{a0}XYZ
No match
0: \x{85}xxx\x{a0}
AB\x{a0}xxx\x{85}XYZ
No match
0: \x{a0}xxx\x{85}
/\S \S/utf,tables=1
/\S \S/utf,tables=2
\x{a2} \x{84}
0: \x{a2} \x{84}

745
testdata/testoutput8-16 vendored Normal file
View File

@ -0,0 +1,745 @@
# These are a few representative patterns whose lengths and offsets are to be
# shown when the link size is 2. This is just a doublecheck test to ensure the
# sizes don't go horribly wrong when something is changed. The pattern contents
# are all themselves checked in other tests. Unicode, including property
# support, is required for these tests.
#pattern fullbincode,memory
/((?i)b)/
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 5 CBra 1
5 /i b
7 5 Ket
9 9 Ket
11 End
------------------------------------------------------------------
/(?s)(.*X|^B)/
Memory allocation (code space): 38
------------------------------------------------------------------
0 16 Bra
2 7 CBra 1
5 AllAny*
7 X
9 5 Alt
11 ^
12 B
14 12 Ket
16 16 Ket
18 End
------------------------------------------------------------------
/(?s:.*X|^B)/
Memory allocation (code space): 36
------------------------------------------------------------------
0 15 Bra
2 6 Bra
4 AllAny*
6 X
8 5 Alt
10 ^
11 B
13 11 Ket
15 15 Ket
17 End
------------------------------------------------------------------
/^[[:alnum:]]/
Memory allocation (code space): 46
------------------------------------------------------------------
0 20 Bra
2 ^
3 [0-9A-Za-z]
20 20 Ket
22 End
------------------------------------------------------------------
/#/Ix
Memory allocation (code space): 10
------------------------------------------------------------------
0 2 Bra
2 2 Ket
4 End
------------------------------------------------------------------
Capturing subpattern count = 0
May match empty string
Options: extended
No first code unit
No last code unit
Subject length lower bound = 0
/a#/Ix
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: extended
First code unit = 'a'
No last code unit
Subject length lower bound = 1
/x?+/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 x?+
4 4 Ket
6 End
------------------------------------------------------------------
/x++/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 x++
4 4 Ket
6 End
------------------------------------------------------------------
/x{1,3}+/
Memory allocation (code space): 20
------------------------------------------------------------------
0 7 Bra
2 x
4 x{0,2}+
7 7 Ket
9 End
------------------------------------------------------------------
/(x)*+/
Memory allocation (code space): 26
------------------------------------------------------------------
0 10 Bra
2 Braposzero
3 5 CBraPos 1
6 x
8 5 KetRpos
10 10 Ket
12 End
------------------------------------------------------------------
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
Memory allocation (code space): 142
------------------------------------------------------------------
0 68 Bra
2 ^
3 63 CBra 1
6 5 CBra 2
9 a+
11 5 Ket
13 21 CBra 3
16 [ab]+?
34 21 Ket
36 21 CBra 4
39 [bc]+
57 21 Ket
59 5 CBra 5
62 \w*+
64 5 Ket
66 63 Ket
68 68 Ket
70 End
------------------------------------------------------------------
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 1648
------------------------------------------------------------------
0 821 Bra
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
820 \b
821 821 Ket
823 End
------------------------------------------------------------------
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 1628
------------------------------------------------------------------
0 811 Bra
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
810 \b
811 811 Ket
813 End
------------------------------------------------------------------
/(a(?1)b)/
Memory allocation (code space): 32
------------------------------------------------------------------
0 13 Bra
2 9 CBra 1
5 a
7 2 Recurse
9 b
11 9 Ket
13 13 Ket
15 End
------------------------------------------------------------------
/(a(?1)+b)/
Memory allocation (code space): 40
------------------------------------------------------------------
0 17 Bra
2 13 CBra 1
5 a
7 4 Once
9 2 Recurse
11 4 KetRmax
13 b
15 13 Ket
17 17 Ket
19 End
------------------------------------------------------------------
/a(?P<name1>b|c)d(?P<longername2>e)/
Memory allocation (code space): 54
------------------------------------------------------------------
0 24 Bra
2 a
4 5 CBra 1
7 b
9 4 Alt
11 c
13 9 Ket
15 d
17 5 CBra 2
20 e
22 5 Ket
24 24 Ket
26 End
------------------------------------------------------------------
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
Memory allocation (code space): 64
------------------------------------------------------------------
0 29 Bra
2 18 Bra
4 a
6 12 CBra 1
9 c
11 5 CBra 2
14 d
16 5 Ket
18 12 Ket
20 18 Ket
22 5 CBra 3
25 a
27 5 Ket
29 29 Ket
31 End
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/
Memory allocation (code space): 54
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
5 a
7 5 Ket
9 Any
10 Any
11 Any
12 \1
14 bbb
20 2 Recurse
22 d
24 24 Ket
26 End
------------------------------------------------------------------
/abc(?C255)de(?C)f/
Memory allocation (code space): 50
------------------------------------------------------------------
0 22 Bra
2 abc
8 Callout 255 10 1
12 de
16 Callout 0 16 1
20 f
22 22 Ket
24 End
------------------------------------------------------------------
/abcde/auto_callout
Memory allocation (code space): 78
------------------------------------------------------------------
0 36 Bra
2 Callout 255 0 1
6 a
8 Callout 255 1 1
12 b
14 Callout 255 2 1
18 c
20 Callout 255 3 1
24 d
26 Callout 255 4 1
30 e
32 Callout 255 5 0
36 36 Ket
38 End
------------------------------------------------------------------
/\x{100}/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{1000}/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{1000}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{10000}/utf
Memory allocation (code space): 16
------------------------------------------------------------------
0 5 Bra
2 \x{10000}
5 5 Ket
7 End
------------------------------------------------------------------
/\x{100000}/utf
Memory allocation (code space): 16
------------------------------------------------------------------
0 5 Bra
2 \x{100000}
5 5 Ket
7 End
------------------------------------------------------------------
/\x{10ffff}/utf
Memory allocation (code space): 16
------------------------------------------------------------------
0 5 Bra
2 \x{10ffff}
5 5 Ket
7 End
------------------------------------------------------------------
/\x{110000}/utf
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
/[\x{ff}]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{ff}
4 4 Ket
6 End
------------------------------------------------------------------
/[\x{100}]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/\x80/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{80}
4 4 Ket
6 End
------------------------------------------------------------------
/\xff/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{ff}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
Memory allocation (code space): 26
------------------------------------------------------------------
0 10 Bra
2 A\x{2262}\x{391}.
10 10 Ket
12 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
Subject length lower bound = 4
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
Memory allocation (code space): 22
------------------------------------------------------------------
0 8 Bra
2 \x{d55c}\x{ad6d}\x{c5b4}
8 8 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
Subject length lower bound = 3
/\x{65e5}\x{672c}\x{8a9e}/I,utf
Memory allocation (code space): 22
------------------------------------------------------------------
0 8 Bra
2 \x{65e5}\x{672c}\x{8a9e}
8 8 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
Subject length lower bound = 3
/[\x{100}]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/[Z\x{100}]/utf
Memory allocation (code space): 54
------------------------------------------------------------------
0 24 Bra
2 [Z\x{100}]
24 24 Ket
26 End
------------------------------------------------------------------
/^[\x{100}\E-\Q\E\x{150}]/utf
Memory allocation (code space): 26
------------------------------------------------------------------
0 10 Bra
2 ^
3 [\x{100}-\x{150}]
10 10 Ket
12 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E]/utf
Memory allocation (code space): 26
------------------------------------------------------------------
0 10 Bra
2 ^
3 [\x{100}-\x{150}]
10 10 Ket
12 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E/utf
Failed: error 106 at offset 13: missing terminating ] for character class
/[\p{L}]/
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\p{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\p{^L}]/
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\P{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\P{L}]/
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\P{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\P{^L}]/
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\p{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[abc\p{L}\x{0660}]/utf
Memory allocation (code space): 60
------------------------------------------------------------------
0 27 Bra
2 [a-c\p{L}\x{660}]
27 27 Ket
29 End
------------------------------------------------------------------
/[\p{Nd}]/utf
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\p{Nd}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\p{Nd}+-]+/utf
Memory allocation (code space): 58
------------------------------------------------------------------
0 26 Bra
2 [+\-\p{Nd}]++
26 26 Ket
28 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
Memory allocation (code space): 32
------------------------------------------------------------------
0 13 Bra
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
13 13 Ket
15 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
Memory allocation (code space): 32
------------------------------------------------------------------
0 13 Bra
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
13 13 Ket
15 End
------------------------------------------------------------------
/[\x{105}-\x{109}]/i,utf
Memory allocation (code space): 24
------------------------------------------------------------------
0 9 Bra
2 [\x{104}-\x{109}]
9 9 Ket
11 End
------------------------------------------------------------------
/( ( (?(1)0|) )* )/x
Memory allocation (code space): 52
------------------------------------------------------------------
0 23 Bra
2 19 CBra 1
5 Brazero
6 13 SCBra 2
9 6 Cond
11 1 Cond ref
13 0
15 2 Alt
17 8 Ket
19 13 KetRmax
21 19 Ket
23 23 Ket
25 End
------------------------------------------------------------------
/( (?(1)0|)* )/x
Memory allocation (code space): 42
------------------------------------------------------------------
0 18 Bra
2 14 CBra 1
5 Brazero
6 6 SCond
8 1 Cond ref
10 0
12 2 Alt
14 8 KetRmax
16 14 Ket
18 18 Ket
20 End
------------------------------------------------------------------
/[a]/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
/[a]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
/[\xaa]/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{aa}
4 4 Ket
6 End
------------------------------------------------------------------
/[\xaa]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 \x{aa}
4 4 Ket
6 End
------------------------------------------------------------------
/[^a]/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 [^a]
4 4 Ket
6 End
------------------------------------------------------------------
/[^a]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 [^a]
4 4 Ket
6 End
------------------------------------------------------------------
/[^\xaa]/
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 [^\x{aa}]
4 4 Ket
6 End
------------------------------------------------------------------
/[^\xaa]/utf
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
2 [^\x{aa}]
4 4 Ket
6 End
------------------------------------------------------------------
#pattern -memory
/[^\d]/utf,ucp
------------------------------------------------------------------
0 9 Bra
2 [^\p{Nd}]
9 9 Ket
11 End
------------------------------------------------------------------
/[[:^alpha:][:^cntrl:]]+/utf,ucp
------------------------------------------------------------------
0 26 Bra
2 [ -~\x80-\xff\P{L}]++
26 26 Ket
28 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/utf,ucp
------------------------------------------------------------------
0 26 Bra
2 [ -~\x80-\xff\P{L}]++
26 26 Ket
28 End
------------------------------------------------------------------
/[[:alpha:]]+/utf,ucp
------------------------------------------------------------------
0 10 Bra
2 [\p{L}]++
10 10 Ket
12 End
------------------------------------------------------------------
/[[:^alpha:]\S]+/utf,ucp
------------------------------------------------------------------
0 13 Bra
2 [\P{L}\P{Xsp}]++
13 13 Ket
15 End
------------------------------------------------------------------
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
------------------------------------------------------------------
0 60 Bra
2 abc
8 5 CBra 1
11 d
13 4 Alt
15 e
17 9 Ket
19 *THEN
20 x
22 12 CBra 2
25 123
31 *THEN
32 4
34 24 Alt
36 567
42 5 CBra 3
45 b
47 4 Alt
49 q
51 9 Ket
53 *THEN
54 xx
58 36 Ket
60 60 Ket
62 End
------------------------------------------------------------------
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
0 39 Bra
2 Brazero
3 32 SCBra 1
6 27 Once
8 12 CBra 2
11 7 CBra 3
14 a
16 \2
18 7 Ket
20 11 Alt
22 5 CBra 4
25 a*
27 5 Ket
29 22 Recurse
31 23 Ket
33 27 Ket
35 32 KetRmax
37 a?+
39 39 Ket
41 End
------------------------------------------------------------------
# End of testinput8

745
testdata/testoutput8-32 vendored Normal file
View File

@ -0,0 +1,745 @@
# These are a few representative patterns whose lengths and offsets are to be
# shown when the link size is 2. This is just a doublecheck test to ensure the
# sizes don't go horribly wrong when something is changed. The pattern contents
# are all themselves checked in other tests. Unicode, including property
# support, is required for these tests.
#pattern fullbincode,memory
/((?i)b)/
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 5 CBra 1
5 /i b
7 5 Ket
9 9 Ket
11 End
------------------------------------------------------------------
/(?s)(.*X|^B)/
Memory allocation (code space): 76
------------------------------------------------------------------
0 16 Bra
2 7 CBra 1
5 AllAny*
7 X
9 5 Alt
11 ^
12 B
14 12 Ket
16 16 Ket
18 End
------------------------------------------------------------------
/(?s:.*X|^B)/
Memory allocation (code space): 72
------------------------------------------------------------------
0 15 Bra
2 6 Bra
4 AllAny*
6 X
8 5 Alt
10 ^
11 B
13 11 Ket
15 15 Ket
17 End
------------------------------------------------------------------
/^[[:alnum:]]/
Memory allocation (code space): 60
------------------------------------------------------------------
0 12 Bra
2 ^
3 [0-9A-Za-z]
12 12 Ket
14 End
------------------------------------------------------------------
/#/Ix
Memory allocation (code space): 20
------------------------------------------------------------------
0 2 Bra
2 2 Ket
4 End
------------------------------------------------------------------
Capturing subpattern count = 0
May match empty string
Options: extended
No first code unit
No last code unit
Subject length lower bound = 0
/a#/Ix
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: extended
First code unit = 'a'
No last code unit
Subject length lower bound = 1
/x?+/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 x?+
4 4 Ket
6 End
------------------------------------------------------------------
/x++/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 x++
4 4 Ket
6 End
------------------------------------------------------------------
/x{1,3}+/
Memory allocation (code space): 40
------------------------------------------------------------------
0 7 Bra
2 x
4 x{0,2}+
7 7 Ket
9 End
------------------------------------------------------------------
/(x)*+/
Memory allocation (code space): 52
------------------------------------------------------------------
0 10 Bra
2 Braposzero
3 5 CBraPos 1
6 x
8 5 KetRpos
10 10 Ket
12 End
------------------------------------------------------------------
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
Memory allocation (code space): 220
------------------------------------------------------------------
0 52 Bra
2 ^
3 47 CBra 1
6 5 CBra 2
9 a+
11 5 Ket
13 13 CBra 3
16 [ab]+?
26 13 Ket
28 13 CBra 4
31 [bc]+
41 13 Ket
43 5 CBra 5
46 \w*+
48 5 Ket
50 47 Ket
52 52 Ket
54 End
------------------------------------------------------------------
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 3296
------------------------------------------------------------------
0 821 Bra
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
820 \b
821 821 Ket
823 End
------------------------------------------------------------------
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 3256
------------------------------------------------------------------
0 811 Bra
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
810 \b
811 811 Ket
813 End
------------------------------------------------------------------
/(a(?1)b)/
Memory allocation (code space): 64
------------------------------------------------------------------
0 13 Bra
2 9 CBra 1
5 a
7 2 Recurse
9 b
11 9 Ket
13 13 Ket
15 End
------------------------------------------------------------------
/(a(?1)+b)/
Memory allocation (code space): 80
------------------------------------------------------------------
0 17 Bra
2 13 CBra 1
5 a
7 4 Once
9 2 Recurse
11 4 KetRmax
13 b
15 13 Ket
17 17 Ket
19 End
------------------------------------------------------------------
/a(?P<name1>b|c)d(?P<longername2>e)/
Memory allocation (code space): 108
------------------------------------------------------------------
0 24 Bra
2 a
4 5 CBra 1
7 b
9 4 Alt
11 c
13 9 Ket
15 d
17 5 CBra 2
20 e
22 5 Ket
24 24 Ket
26 End
------------------------------------------------------------------
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
Memory allocation (code space): 128
------------------------------------------------------------------
0 29 Bra
2 18 Bra
4 a
6 12 CBra 1
9 c
11 5 CBra 2
14 d
16 5 Ket
18 12 Ket
20 18 Ket
22 5 CBra 3
25 a
27 5 Ket
29 29 Ket
31 End
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/
Memory allocation (code space): 108
------------------------------------------------------------------
0 24 Bra
2 5 CBra 1
5 a
7 5 Ket
9 Any
10 Any
11 Any
12 \1
14 bbb
20 2 Recurse
22 d
24 24 Ket
26 End
------------------------------------------------------------------
/abc(?C255)de(?C)f/
Memory allocation (code space): 100
------------------------------------------------------------------
0 22 Bra
2 abc
8 Callout 255 10 1
12 de
16 Callout 0 16 1
20 f
22 22 Ket
24 End
------------------------------------------------------------------
/abcde/auto_callout
Memory allocation (code space): 156
------------------------------------------------------------------
0 36 Bra
2 Callout 255 0 1
6 a
8 Callout 255 1 1
12 b
14 Callout 255 2 1
18 c
20 Callout 255 3 1
24 d
26 Callout 255 4 1
30 e
32 Callout 255 5 0
36 36 Ket
38 End
------------------------------------------------------------------
/\x{100}/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{1000}/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{1000}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{10000}/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{10000}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{100000}/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{100000}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{10ffff}/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{10ffff}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{110000}/utf
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
/[\x{ff}]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{ff}
4 4 Ket
6 End
------------------------------------------------------------------
/[\x{100}]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/\x80/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{80}
4 4 Ket
6 End
------------------------------------------------------------------
/\xff/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{ff}
4 4 Ket
6 End
------------------------------------------------------------------
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
Memory allocation (code space): 52
------------------------------------------------------------------
0 10 Bra
2 A\x{2262}\x{391}.
10 10 Ket
12 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
Subject length lower bound = 4
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
Memory allocation (code space): 44
------------------------------------------------------------------
0 8 Bra
2 \x{d55c}\x{ad6d}\x{c5b4}
8 8 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \x{d55c}
Last code unit = \x{c5b4}
Subject length lower bound = 3
/\x{65e5}\x{672c}\x{8a9e}/I,utf
Memory allocation (code space): 44
------------------------------------------------------------------
0 8 Bra
2 \x{65e5}\x{672c}\x{8a9e}
8 8 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \x{65e5}
Last code unit = \x{8a9e}
Subject length lower bound = 3
/[\x{100}]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{100}
4 4 Ket
6 End
------------------------------------------------------------------
/[Z\x{100}]/utf
Memory allocation (code space): 76
------------------------------------------------------------------
0 16 Bra
2 [Z\x{100}]
16 16 Ket
18 End
------------------------------------------------------------------
/^[\x{100}\E-\Q\E\x{150}]/utf
Memory allocation (code space): 52
------------------------------------------------------------------
0 10 Bra
2 ^
3 [\x{100}-\x{150}]
10 10 Ket
12 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E]/utf
Memory allocation (code space): 52
------------------------------------------------------------------
0 10 Bra
2 ^
3 [\x{100}-\x{150}]
10 10 Ket
12 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E/utf
Failed: error 106 at offset 13: missing terminating ] for character class
/[\p{L}]/
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\p{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\p{^L}]/
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\P{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\P{L}]/
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\P{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\P{^L}]/
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\p{L}]
9 9 Ket
11 End
------------------------------------------------------------------
/[abc\p{L}\x{0660}]/utf
Memory allocation (code space): 88
------------------------------------------------------------------
0 19 Bra
2 [a-c\p{L}\x{660}]
19 19 Ket
21 End
------------------------------------------------------------------
/[\p{Nd}]/utf
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\p{Nd}]
9 9 Ket
11 End
------------------------------------------------------------------
/[\p{Nd}+-]+/utf
Memory allocation (code space): 84
------------------------------------------------------------------
0 18 Bra
2 [+\-\p{Nd}]++
18 18 Ket
20 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
Memory allocation (code space): 60
------------------------------------------------------------------
0 12 Bra
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
12 12 Ket
14 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
Memory allocation (code space): 60
------------------------------------------------------------------
0 12 Bra
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
12 12 Ket
14 End
------------------------------------------------------------------
/[\x{105}-\x{109}]/i,utf
Memory allocation (code space): 48
------------------------------------------------------------------
0 9 Bra
2 [\x{104}-\x{109}]
9 9 Ket
11 End
------------------------------------------------------------------
/( ( (?(1)0|) )* )/x
Memory allocation (code space): 104
------------------------------------------------------------------
0 23 Bra
2 19 CBra 1
5 Brazero
6 13 SCBra 2
9 6 Cond
11 1 Cond ref
13 0
15 2 Alt
17 8 Ket
19 13 KetRmax
21 19 Ket
23 23 Ket
25 End
------------------------------------------------------------------
/( (?(1)0|)* )/x
Memory allocation (code space): 84
------------------------------------------------------------------
0 18 Bra
2 14 CBra 1
5 Brazero
6 6 SCond
8 1 Cond ref
10 0
12 2 Alt
14 8 KetRmax
16 14 Ket
18 18 Ket
20 End
------------------------------------------------------------------
/[a]/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
/[a]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 a
4 4 Ket
6 End
------------------------------------------------------------------
/[\xaa]/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{aa}
4 4 Ket
6 End
------------------------------------------------------------------
/[\xaa]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 \x{aa}
4 4 Ket
6 End
------------------------------------------------------------------
/[^a]/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 [^a]
4 4 Ket
6 End
------------------------------------------------------------------
/[^a]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 [^a]
4 4 Ket
6 End
------------------------------------------------------------------
/[^\xaa]/
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 [^\x{aa}]
4 4 Ket
6 End
------------------------------------------------------------------
/[^\xaa]/utf
Memory allocation (code space): 28
------------------------------------------------------------------
0 4 Bra
2 [^\x{aa}]
4 4 Ket
6 End
------------------------------------------------------------------
#pattern -memory
/[^\d]/utf,ucp
------------------------------------------------------------------
0 9 Bra
2 [^\p{Nd}]
9 9 Ket
11 End
------------------------------------------------------------------
/[[:^alpha:][:^cntrl:]]+/utf,ucp
------------------------------------------------------------------
0 18 Bra
2 [ -~\x80-\xff\P{L}]++
18 18 Ket
20 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/utf,ucp
------------------------------------------------------------------
0 18 Bra
2 [ -~\x80-\xff\P{L}]++
18 18 Ket
20 End
------------------------------------------------------------------
/[[:alpha:]]+/utf,ucp
------------------------------------------------------------------
0 10 Bra
2 [\p{L}]++
10 10 Ket
12 End
------------------------------------------------------------------
/[[:^alpha:]\S]+/utf,ucp
------------------------------------------------------------------
0 13 Bra
2 [\P{L}\P{Xsp}]++
13 13 Ket
15 End
------------------------------------------------------------------
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
------------------------------------------------------------------
0 60 Bra
2 abc
8 5 CBra 1
11 d
13 4 Alt
15 e
17 9 Ket
19 *THEN
20 x
22 12 CBra 2
25 123
31 *THEN
32 4
34 24 Alt
36 567
42 5 CBra 3
45 b
47 4 Alt
49 q
51 9 Ket
53 *THEN
54 xx
58 36 Ket
60 60 Ket
62 End
------------------------------------------------------------------
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
0 39 Bra
2 Brazero
3 32 SCBra 1
6 27 Once
8 12 CBra 2
11 7 CBra 3
14 a
16 \2
18 7 Ket
20 11 Alt
22 5 CBra 4
25 a*
27 5 Ket
29 22 Recurse
31 23 Ket
33 27 Ket
35 32 KetRmax
37 a?+
39 39 Ket
41 End
------------------------------------------------------------------
# End of testinput8

745
testdata/testoutput8-8 vendored Normal file
View File

@ -0,0 +1,745 @@
# These are a few representative patterns whose lengths and offsets are to be
# shown when the link size is 2. This is just a doublecheck test to ensure the
# sizes don't go horribly wrong when something is changed. The pattern contents
# are all themselves checked in other tests. Unicode, including property
# support, is required for these tests.
#pattern fullbincode,memory
/((?i)b)/
Memory allocation (code space): 17
------------------------------------------------------------------
0 13 Bra
3 7 CBra 1
8 /i b
10 7 Ket
13 13 Ket
16 End
------------------------------------------------------------------
/(?s)(.*X|^B)/
Memory allocation (code space): 25
------------------------------------------------------------------
0 21 Bra
3 9 CBra 1
8 AllAny*
10 X
12 6 Alt
15 ^
16 B
18 15 Ket
21 21 Ket
24 End
------------------------------------------------------------------
/(?s:.*X|^B)/
Memory allocation (code space): 23
------------------------------------------------------------------
0 19 Bra
3 7 Bra
6 AllAny*
8 X
10 6 Alt
13 ^
14 B
16 13 Ket
19 19 Ket
22 End
------------------------------------------------------------------
/^[[:alnum:]]/
Memory allocation (code space): 41
------------------------------------------------------------------
0 37 Bra
3 ^
4 [0-9A-Za-z]
37 37 Ket
40 End
------------------------------------------------------------------
/#/Ix
Memory allocation (code space): 7
------------------------------------------------------------------
0 3 Bra
3 3 Ket
6 End
------------------------------------------------------------------
Capturing subpattern count = 0
May match empty string
Options: extended
No first code unit
No last code unit
Subject length lower bound = 0
/a#/Ix
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 a
5 5 Ket
8 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: extended
First code unit = 'a'
No last code unit
Subject length lower bound = 1
/x?+/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 x?+
5 5 Ket
8 End
------------------------------------------------------------------
/x++/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 x++
5 5 Ket
8 End
------------------------------------------------------------------
/x{1,3}+/
Memory allocation (code space): 13
------------------------------------------------------------------
0 9 Bra
3 x
5 x{0,2}+
9 9 Ket
12 End
------------------------------------------------------------------
/(x)*+/
Memory allocation (code space): 18
------------------------------------------------------------------
0 14 Bra
3 Braposzero
4 7 CBraPos 1
9 x
11 7 KetRpos
14 14 Ket
17 End
------------------------------------------------------------------
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
Memory allocation (code space): 120
------------------------------------------------------------------
0 116 Bra
3 ^
4 109 CBra 1
9 7 CBra 2
14 a+
16 7 Ket
19 39 CBra 3
24 [ab]+?
58 39 Ket
61 39 CBra 4
66 [bc]+
100 39 Ket
103 7 CBra 5
108 \w*+
110 7 Ket
113 109 Ket
116 116 Ket
119 End
------------------------------------------------------------------
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 826
------------------------------------------------------------------
0 822 Bra
3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
821 \b
822 822 Ket
825 End
------------------------------------------------------------------
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
Memory allocation (code space): 816
------------------------------------------------------------------
0 812 Bra
3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
811 \b
812 812 Ket
815 End
------------------------------------------------------------------
/(a(?1)b)/
Memory allocation (code space): 22
------------------------------------------------------------------
0 18 Bra
3 12 CBra 1
8 a
10 3 Recurse
13 b
15 12 Ket
18 18 Ket
21 End
------------------------------------------------------------------
/(a(?1)+b)/
Memory allocation (code space): 28
------------------------------------------------------------------
0 24 Bra
3 18 CBra 1
8 a
10 6 Once
13 3 Recurse
16 6 KetRmax
19 b
21 18 Ket
24 24 Ket
27 End
------------------------------------------------------------------
/a(?P<name1>b|c)d(?P<longername2>e)/
Memory allocation (code space): 36
------------------------------------------------------------------
0 32 Bra
3 a
5 7 CBra 1
10 b
12 5 Alt
15 c
17 12 Ket
20 d
22 7 CBra 2
27 e
29 7 Ket
32 32 Ket
35 End
------------------------------------------------------------------
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
Memory allocation (code space): 45
------------------------------------------------------------------
0 41 Bra
3 25 Bra
6 a
8 17 CBra 1
13 c
15 7 CBra 2
20 d
22 7 Ket
25 17 Ket
28 25 Ket
31 7 CBra 3
36 a
38 7 Ket
41 41 Ket
44 End
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/
Memory allocation (code space): 34
------------------------------------------------------------------
0 30 Bra
3 7 CBra 1
8 a
10 7 Ket
13 Any
14 Any
15 Any
16 \1
19 bbb
25 3 Recurse
28 d
30 30 Ket
33 End
------------------------------------------------------------------
/abc(?C255)de(?C)f/
Memory allocation (code space): 31
------------------------------------------------------------------
0 27 Bra
3 abc
9 Callout 255 10 1
15 de
19 Callout 0 16 1
25 f
27 27 Ket
30 End
------------------------------------------------------------------
/abcde/auto_callout
Memory allocation (code space): 53
------------------------------------------------------------------
0 49 Bra
3 Callout 255 0 1
9 a
11 Callout 255 1 1
17 b
19 Callout 255 2 1
25 c
27 Callout 255 3 1
33 d
35 Callout 255 4 1
41 e
43 Callout 255 5 0
49 49 Ket
52 End
------------------------------------------------------------------
/\x{100}/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{100}
6 6 Ket
9 End
------------------------------------------------------------------
/\x{1000}/utf
Memory allocation (code space): 11
------------------------------------------------------------------
0 7 Bra
3 \x{1000}
7 7 Ket
10 End
------------------------------------------------------------------
/\x{10000}/utf
Memory allocation (code space): 12
------------------------------------------------------------------
0 8 Bra
3 \x{10000}
8 8 Ket
11 End
------------------------------------------------------------------
/\x{100000}/utf
Memory allocation (code space): 12
------------------------------------------------------------------
0 8 Bra
3 \x{100000}
8 8 Ket
11 End
------------------------------------------------------------------
/\x{10ffff}/utf
Memory allocation (code space): 12
------------------------------------------------------------------
0 8 Bra
3 \x{10ffff}
8 8 Ket
11 End
------------------------------------------------------------------
/\x{110000}/utf
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
/[\x{ff}]/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{ff}
6 6 Ket
9 End
------------------------------------------------------------------
/[\x{100}]/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{100}
6 6 Ket
9 End
------------------------------------------------------------------
/\x80/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{80}
6 6 Ket
9 End
------------------------------------------------------------------
/\xff/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{ff}
6 6 Ket
9 End
------------------------------------------------------------------
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
Memory allocation (code space): 18
------------------------------------------------------------------
0 14 Bra
3 A\x{2262}\x{391}.
14 14 Ket
17 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = 'A'
Last code unit = '.'
Subject length lower bound = 4
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
Memory allocation (code space): 19
------------------------------------------------------------------
0 15 Bra
3 \x{d55c}\x{ad6d}\x{c5b4}
15 15 Ket
18 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \xed
Last code unit = \xb4
Subject length lower bound = 3
/\x{65e5}\x{672c}\x{8a9e}/I,utf
Memory allocation (code space): 19
------------------------------------------------------------------
0 15 Bra
3 \x{65e5}\x{672c}\x{8a9e}
15 15 Ket
18 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf
First code unit = \xe6
Last code unit = \x9e
Subject length lower bound = 3
/[\x{100}]/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{100}
6 6 Ket
9 End
------------------------------------------------------------------
/[Z\x{100}]/utf
Memory allocation (code space): 47
------------------------------------------------------------------
0 43 Bra
3 [Z\x{100}]
43 43 Ket
46 End
------------------------------------------------------------------
/^[\x{100}\E-\Q\E\x{150}]/utf
Memory allocation (code space): 18
------------------------------------------------------------------
0 14 Bra
3 ^
4 [\x{100}-\x{150}]
14 14 Ket
17 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E]/utf
Memory allocation (code space): 18
------------------------------------------------------------------
0 14 Bra
3 ^
4 [\x{100}-\x{150}]
14 14 Ket
17 End
------------------------------------------------------------------
/^[\QĀ\E-\QŐ\E/utf
Failed: error 106 at offset 15: missing terminating ] for character class
/[\p{L}]/
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra
3 [\p{L}]
11 11 Ket
14 End
------------------------------------------------------------------
/[\p{^L}]/
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra
3 [\P{L}]
11 11 Ket
14 End
------------------------------------------------------------------
/[\P{L}]/
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra
3 [\P{L}]
11 11 Ket
14 End
------------------------------------------------------------------
/[\P{^L}]/
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra
3 [\p{L}]
11 11 Ket
14 End
------------------------------------------------------------------
/[abc\p{L}\x{0660}]/utf
Memory allocation (code space): 50
------------------------------------------------------------------
0 46 Bra
3 [a-c\p{L}\x{660}]
46 46 Ket
49 End
------------------------------------------------------------------
/[\p{Nd}]/utf
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra
3 [\p{Nd}]
11 11 Ket
14 End
------------------------------------------------------------------
/[\p{Nd}+-]+/utf
Memory allocation (code space): 48
------------------------------------------------------------------
0 44 Bra
3 [+\-\p{Nd}]++
44 44 Ket
47 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
Memory allocation (code space): 25
------------------------------------------------------------------
0 21 Bra
3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
21 21 Ket
24 End
------------------------------------------------------------------
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
Memory allocation (code space): 25
------------------------------------------------------------------
0 21 Bra
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
21 21 Ket
24 End
------------------------------------------------------------------
/[\x{105}-\x{109}]/i,utf
Memory allocation (code space): 17
------------------------------------------------------------------
0 13 Bra
3 [\x{104}-\x{109}]
13 13 Ket
16 End
------------------------------------------------------------------
/( ( (?(1)0|) )* )/x
Memory allocation (code space): 38
------------------------------------------------------------------
0 34 Bra
3 28 CBra 1
8 Brazero
9 19 SCBra 2
14 8 Cond
17 1 Cond ref
20 0
22 3 Alt
25 11 Ket
28 19 KetRmax
31 28 Ket
34 34 Ket
37 End
------------------------------------------------------------------
/( (?(1)0|)* )/x
Memory allocation (code space): 30
------------------------------------------------------------------
0 26 Bra
3 20 CBra 1
8 Brazero
9 8 SCond
12 1 Cond ref
15 0
17 3 Alt
20 11 KetRmax
23 20 Ket
26 26 Ket
29 End
------------------------------------------------------------------
/[a]/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 a
5 5 Ket
8 End
------------------------------------------------------------------
/[a]/utf
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 a
5 5 Ket
8 End
------------------------------------------------------------------
/[\xaa]/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 \x{aa}
5 5 Ket
8 End
------------------------------------------------------------------
/[\xaa]/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 \x{aa}
6 6 Ket
9 End
------------------------------------------------------------------
/[^a]/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 [^a]
5 5 Ket
8 End
------------------------------------------------------------------
/[^a]/utf
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 [^a]
5 5 Ket
8 End
------------------------------------------------------------------
/[^\xaa]/
Memory allocation (code space): 9
------------------------------------------------------------------
0 5 Bra
3 [^\x{aa}]
5 5 Ket
8 End
------------------------------------------------------------------
/[^\xaa]/utf
Memory allocation (code space): 10
------------------------------------------------------------------
0 6 Bra
3 [^\x{aa}]
6 6 Ket
9 End
------------------------------------------------------------------
#pattern -memory
/[^\d]/utf,ucp
------------------------------------------------------------------
0 11 Bra
3 [^\p{Nd}]
11 11 Ket
14 End
------------------------------------------------------------------
/[[:^alpha:][:^cntrl:]]+/utf,ucp
------------------------------------------------------------------
0 44 Bra
3 [ -~\x80-\xff\P{L}]++
44 44 Ket
47 End
------------------------------------------------------------------
/[[:^cntrl:][:^alpha:]]+/utf,ucp
------------------------------------------------------------------
0 44 Bra
3 [ -~\x80-\xff\P{L}]++
44 44 Ket
47 End
------------------------------------------------------------------
/[[:alpha:]]+/utf,ucp
------------------------------------------------------------------
0 12 Bra
3 [\p{L}]++
12 12 Ket
15 End
------------------------------------------------------------------
/[[:^alpha:]\S]+/utf,ucp
------------------------------------------------------------------
0 15 Bra
3 [\P{L}\P{Xsp}]++
15 15 Ket
18 End
------------------------------------------------------------------
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
------------------------------------------------------------------
0 73 Bra
3 abc
9 7 CBra 1
14 d
16 5 Alt
19 e
21 12 Ket
24 *THEN
25 x
27 14 CBra 2
32 123
38 *THEN
39 4
41 29 Alt
44 567
50 7 CBra 3
55 b
57 5 Alt
60 q
62 12 Ket
65 *THEN
66 xx
70 43 Ket
73 73 Ket
76 End
------------------------------------------------------------------
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
0 57 Bra
3 Brazero
4 48 SCBra 1
9 40 Once
12 18 CBra 2
17 10 CBra 3
22 a
24 \2
27 10 Ket
30 16 Alt
33 7 CBra 4
38 a*
40 7 Ket
43 33 Recurse
46 34 Ket
49 40 Ket
52 48 KetRmax
55 a?+
57 57 Ket
60 End
------------------------------------------------------------------
# End of testinput8

498
testdata/testoutput9 vendored Normal file
View File

@ -0,0 +1,498 @@
# This set of tests is run only with the 8-bit library. They do not require
# UTF-8 or Unicode property support. The file starts with all the tests of
# the POSIX interface, because that is supported only with the 8-bit library.
#forbid_utf
#pattern posix
/abc/
abc
0: abc
*** Failers
No match: POSIX code 17: match failed
/^abc|def/
abcdef
0: abc
abcdef\=notbol
0: def
/.*((abc)$|(def))/
defabc
0: defabc
1: abc
2: abc
defabc\=noteol
0: def
1: def
3: def
/the quick brown fox/
the quick brown fox
0: the quick brown fox
*** Failers
No match: POSIX code 17: match failed
The Quick Brown Fox
No match: POSIX code 17: match failed
/the quick brown fox/i
the quick brown fox
0: the quick brown fox
The Quick Brown Fox
0: The Quick Brown Fox
/abc.def/
*** Failers
No match: POSIX code 17: match failed
abc\ndef
No match: POSIX code 17: match failed
/abc$/
abc
0: abc
abc\n
0: abc
/(abc)\2/
Failed: POSIX code 15: bad back reference at offset 7
/(abc\1)/
abc
No match: POSIX code 17: match failed
/a*(b+)(z)(z)/
aaaabbbbzzzz
0: aaaabbbbzz
1: bbbb
2: z
3: z
aaaabbbbzzzz\=ovector=0
Matched without capture
aaaabbbbzzzz\=ovector=1
0: aaaabbbbzz
aaaabbbbzzzz\=ovector=2
0: aaaabbbbzz
1: bbbb
/ab.cd/
ab-cd
0: ab-cd
ab=cd
0: ab=cd
** Failers
No match: POSIX code 17: match failed
ab\ncd
No match: POSIX code 17: match failed
/ab.cd/s
ab-cd
0: ab-cd
ab=cd
0: ab=cd
ab\ncd
0: ab\x0acd
/a(b)c/no_auto_capture
abc
Matched with REG_NOSUB
/a(?P<name>b)c/no_auto_capture
abc
Matched with REG_NOSUB
/a?|b?/
abc
0: a
** Failers
0:
ddd\=notempty
No match: POSIX code 17: match failed
/\w+A/
CDAAAAB
0: CDAAAA
/\w+A/ungreedy
CDAAAAB
0: CDA
/\Biss\B/I,aftertext
** Ignored with POSIX interface: info
Mississippi
0: iss
0+ issippi
/abc/\
Failed: POSIX code 9: bad escape sequence at offset 4
#pattern -posix
# End of POSIX tests
/a\Cb/
aXb
0: aXb
a\nb
0: a\x0ab
** Failers (too big char)
No match
A\x{123}B
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
** Truncation will probably give the wrong result.
No match
A\o{443}B
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
** Truncation will probably give the wrong result.
No match
/\x{100}/I
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
/\o{400}/I
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
Capturing subpattern count = 0
Contains explicit CR or LF match
Options: extended
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
No last code unit
Subject length lower bound = 3
/\h/I
Capturing subpattern count = 0
No options
Starting code units: \x09 \x20 \xa0
No last code unit
Subject length lower bound = 1
/\H/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
/\v/I
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85
No last code unit
Subject length lower bound = 1
/\V/I
Capturing subpattern count = 0
No options
No first code unit
No last code unit
Subject length lower bound = 1
/\R/I
Capturing subpattern count = 0
No options
Starting code units: \x0a \x0b \x0c \x0d \x85
No last code unit
Subject length lower bound = 1
/[\h]/B
------------------------------------------------------------------
Bra
[\x09 \xa0]
Ket
End
------------------------------------------------------------------
>\x09<
0: \x09
/[\h]+/B
------------------------------------------------------------------
Bra
[\x09 \xa0]++
Ket
End
------------------------------------------------------------------
>\x09\x20\xa0<
0: \x09 \xa0
/[\v]/B
------------------------------------------------------------------
Bra
[\x0a-\x0d\x85]
Ket
End
------------------------------------------------------------------
/[\H]/B
------------------------------------------------------------------
Bra
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
Ket
End
------------------------------------------------------------------
/[^\h]/B
------------------------------------------------------------------
Bra
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
Ket
End
------------------------------------------------------------------
/[\V]/B
------------------------------------------------------------------
Bra
[\x00-\x09\x0e-\x84\x86-\xff]
Ket
End
------------------------------------------------------------------
/[\x0a\V]/B
------------------------------------------------------------------
Bra
[\x00-\x0a\x0e-\x84\x86-\xff]
Ket
End
------------------------------------------------------------------
/\777/I
Failed: error 151 at offset 3: octal value is greater than \377 in 8-bit non-UTF-8 mode
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
XX
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
Failed: error 177 at offset 5: character code point value in \u.... sequence is too large
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
Failed: error 177 at offset 6: character code point value in \u.... sequence is too large
/[^\x00-a]{12,}[^b-\xff]*/B
------------------------------------------------------------------
Bra
[b-\xff] (neg){12,}+
[\x00-a] (neg)*+
Ket
End
------------------------------------------------------------------
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
------------------------------------------------------------------
Bra
[\x00-\x08\x0e-\x1f!-\xff] (neg)*+
\s*
[0-9A-Z_a-z]++
\W+
[\x00-/:-\xff] (neg)*+
\d
0
[\x00-/:-@[-^`{-\xff] (neg){4,6}+
\w*
A
Ket
End
------------------------------------------------------------------
# End of testinput9