All tests except JIT and save/reload are implemented.
This commit is contained in:
parent
e2076960d4
commit
e022475d54
481
RunTest
481
RunTest
|
@ -58,22 +58,18 @@ title5B=" and UCP support"
|
|||
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
||||
title7A="Test 7: DFA matching with UTF"
|
||||
title7B=" and Unicode property support"
|
||||
#title11="Test 11: Internal offsets and code size tests"
|
||||
title8="Test 8: Internal offsets and code size tests"
|
||||
title9="Test 9: Specials for the basic 8-bit library"
|
||||
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
||||
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||
|
||||
#title12="Test 12: JIT-specific features (when JIT is available)"
|
||||
#title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||
#title14="Test 14: Specials for the basic 8-bit library"
|
||||
#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||
#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||
#title17="Test 17: Specials for the basic 16/32-bit library"
|
||||
#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
|
||||
#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
|
||||
#title20="Test 20: DFA specials for the basic 16/32-bit library"
|
||||
|
||||
#title21="Test 21: Reloads for the basic 16/32-bit library"
|
||||
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
|
||||
#title23="Test 23: Specials for the 16-bit library"
|
||||
#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
|
||||
#title25="Test 25: Specials for the 32-bit library"
|
||||
#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
||||
|
||||
maxtest=2
|
||||
|
||||
|
@ -85,12 +81,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
|||
echo $title5A $title5B
|
||||
echo $title6
|
||||
echo $title7A $title7B
|
||||
# echo $title8
|
||||
# echo $title9
|
||||
# echo $title10
|
||||
# echo $title11
|
||||
# echo $title12
|
||||
# echo $title13
|
||||
echo $title8
|
||||
echo $title9
|
||||
echo $title10
|
||||
echo $title11
|
||||
echo $title12
|
||||
echo $title13
|
||||
# echo $title14
|
||||
# echo $title15
|
||||
# echo $title16
|
||||
|
@ -100,10 +96,6 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
|||
# echo $title20
|
||||
# echo $title21
|
||||
# echo $title22
|
||||
# echo $title23
|
||||
# echo $title24
|
||||
# echo $title25
|
||||
# echo $title26
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
@ -178,12 +170,12 @@ do4=no
|
|||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
#do8=no
|
||||
#do9=no
|
||||
#do10=no
|
||||
#do11=no
|
||||
#do12=no
|
||||
#do13=no
|
||||
do8=no
|
||||
do9=no
|
||||
do10=no
|
||||
do11=no
|
||||
do12=no
|
||||
do13=no
|
||||
#do14=no
|
||||
#do15=no
|
||||
#do16=no
|
||||
|
@ -193,10 +185,6 @@ do7=no
|
|||
#do20=no
|
||||
#do21=no
|
||||
#do22=no
|
||||
#do23=no
|
||||
#do24=no
|
||||
#do25=no
|
||||
#do26=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
|
@ -207,12 +195,12 @@ while [ $# -gt 0 ] ; do
|
|||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
# 8) do8=yes;;
|
||||
# 9) do9=yes;;
|
||||
# 10) do10=yes;;
|
||||
# 11) do11=yes;;
|
||||
# 12) do12=yes;;
|
||||
# 13) do13=yes;;
|
||||
8) do8=yes;;
|
||||
9) do9=yes;;
|
||||
10) do10=yes;;
|
||||
11) do11=yes;;
|
||||
12) do12=yes;;
|
||||
13) do13=yes;;
|
||||
# 14) do14=yes;;
|
||||
# 15) do15=yes;;
|
||||
# 16) do16=yes;;
|
||||
|
@ -222,10 +210,6 @@ while [ $# -gt 0 ] ; do
|
|||
# 20) do20=yes;;
|
||||
# 21) do21=yes;;
|
||||
# 22) do22=yes;;
|
||||
# 23) do23=yes;;
|
||||
# 24) do24=yes;;
|
||||
# 25) do25=yes;;
|
||||
# 26) do26=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
|
@ -330,7 +314,7 @@ else
|
|||
fi
|
||||
|
||||
# UTF support always applies to all bit sizes if both are supported; we can't
|
||||
# have UTF-8 support without UTF-16 support (for example).
|
||||
# have UTF-8 support without UTF-16 or UTF-32 support.
|
||||
|
||||
$sim ./pcre2test -C utf >/dev/null
|
||||
utf=$?
|
||||
|
@ -346,14 +330,13 @@ fi
|
|||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
$do5 = no -a $do6 = no -a $do7 = no \
|
||||
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
$do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
||||
$do13 = no \
|
||||
]; then
|
||||
# -a $do8 = no -a \
|
||||
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
||||
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
||||
# -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
||||
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
|
||||
# $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
|
||||
# $do25 = no -a $do26 = no
|
||||
# $do21 = no -a $do22 = no
|
||||
|
||||
do1=yes
|
||||
do2=yes
|
||||
|
@ -362,12 +345,12 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
|||
do5=yes
|
||||
do6=yes
|
||||
do7=yes
|
||||
# do8=yes
|
||||
# do9=yes
|
||||
# do10=yes
|
||||
# do11=yes
|
||||
# do12=yes
|
||||
# do13=yes
|
||||
do8=yes
|
||||
do9=yes
|
||||
do10=yes
|
||||
do11=yes
|
||||
do12=yes
|
||||
do13=yes
|
||||
# do14=yes
|
||||
# do15=yes
|
||||
# do16=yes
|
||||
|
@ -377,10 +360,6 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
|||
# do20=yes
|
||||
# do21=yes
|
||||
# do22=yes
|
||||
# do23=yes
|
||||
# do24=yes
|
||||
# do25=yes
|
||||
# do26=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
|
@ -584,32 +563,137 @@ if [ $do7 = yes ] ; then
|
|||
fi
|
||||
fi
|
||||
|
||||
## Test of internal offsets and code sizes. This test is run only when there
|
||||
## is Unicode property support and the link size is 2. The actual tests are
|
||||
## mostly the same as in some of the above, but in this test we inspect some
|
||||
## offsets and sizes that require a known link size. This is a doublecheck for
|
||||
## the maintainer, just in case something changes unexpectely. The output from
|
||||
## this test is not the same in 8-bit and 16-bit modes.
|
||||
#
|
||||
#if [ $do11 = yes ] ; then
|
||||
# echo $title11
|
||||
# if [ $link_size -ne 2 ] ; then
|
||||
# echo " Skipped because link size is not 2"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput11-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
# Test of internal offsets and code sizes. This test is run only when there
|
||||
# is UTF/UCP support and the link size is 2. The actual tests are
|
||||
# mostly the same as in some of the above, but in this test we inspect some
|
||||
# offsets and sizes that require a known link size. This is a doublecheck for
|
||||
# the maintainer, just in case something changes unexpectely. The output from
|
||||
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
|
||||
# mode-specific output files.
|
||||
|
||||
if [ $do8 = yes ] ; then
|
||||
echo $title8
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
echo " Skipped because link size is not 2"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput8-$bits testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 8-bit-specific features
|
||||
|
||||
if [ "$do9" = yes ] ; then
|
||||
echo $title9
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput9 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for UTF-8 and UCP 8-bit-specific features
|
||||
|
||||
if [ "$do10" = yes ] ; then
|
||||
echo $title10
|
||||
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
echo " Skipped when running 16/32-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput10 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
|
||||
|
||||
if [ $do11 = yes ] ; then
|
||||
echo $title11
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput11-$bits testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
|
||||
# is different for the two widths.
|
||||
|
||||
if [ $do12 = yes ] ; then
|
||||
echo $title12
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
elif [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput12-$bits testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for 16/32-bit-specific features in DFA non-UTF modes
|
||||
|
||||
if [ $do13 = yes ] ; then
|
||||
echo $title13
|
||||
if [ "$bits" = "8" ] ; then
|
||||
echo " Skipped when running 8-bit tests"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput13 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
fi
|
||||
fi
|
||||
|
||||
## Test JIT-specific features when JIT is available
|
||||
#
|
||||
#if [ $do12 = yes ] ; then
|
||||
|
@ -644,169 +728,6 @@ fi
|
|||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features
|
||||
#
|
||||
#if [ "$do14" = yes ] ; then
|
||||
# echo $title14
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# else
|
||||
# cp -f $testdata/saved16 testsaved16
|
||||
# cp -f $testdata/saved32 testsaved32
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput14 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features (needs UTF-8 support)
|
||||
#
|
||||
#if [ "$do15" = yes ] ; then
|
||||
# echo $title15
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput15 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features (Unicode property support)
|
||||
#
|
||||
#if [ $do16 = yes ] ; then
|
||||
# echo $title16
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput16 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features
|
||||
#
|
||||
#if [ $do17 = yes ] ; then
|
||||
# echo $title17
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput17 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features (UTF-16/32 support)
|
||||
#
|
||||
#if [ $do18 = yes ] ; then
|
||||
# echo $title18
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput18-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features (Unicode property support)
|
||||
#
|
||||
#if [ $do19 = yes ] ; then
|
||||
# echo $title19
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput19 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
|
||||
#
|
||||
#if [ $do20 = yes ] ; then
|
||||
# echo $title20
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput20 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for reloads with 16/32-bit library
|
||||
#
|
||||
#if [ $do21 = yes ] ; then
|
||||
|
@ -855,70 +776,6 @@ fi
|
|||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do23 = yes ] ; then
|
||||
# echo $title23
|
||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 8/32-bit tests"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput23 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do24 = yes ] ; then
|
||||
# echo $title24
|
||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 8/32-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput24 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do25 = yes ] ; then
|
||||
# echo $title25
|
||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
||||
# echo " Skipped when running 8/16-bit tests"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput25 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do26 = yes ] ; then
|
||||
# echo $title26
|
||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
||||
# echo " Skipped when running 8/16-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput26 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
||||
done
|
||||
|
|
|
@ -286,9 +286,10 @@ This is a pattern line whose modifier list starts with two one-letter modifiers
|
|||
.SH "PATTERN SYNTAX"
|
||||
.rs
|
||||
.sp
|
||||
A pattern line must start with one of the following characters:
|
||||
A pattern line must start with one of the following characters (common symbols,
|
||||
excluding pattern meta-characters):
|
||||
.sp
|
||||
" / ! ' ` - + = : ; . ,
|
||||
/ ! " ' ` - = _ : ; , % & @ ~
|
||||
.sp
|
||||
This is interpreted as the pattern's delimiter. A regular expression may be
|
||||
continued over several input lines, in which case the newline characters are
|
||||
|
|
|
@ -7833,11 +7833,12 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
|
|||
|
||||
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
|
||||
8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||
points and cannot have another case. In 16-bit and 32-bit mode, we can
|
||||
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||
check wide characters when UTF (and therefore UCP) is supported. */
|
||||
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (UCD_OTHERCASE(firstcu) != firstcu)
|
||||
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
#endif
|
||||
}
|
||||
|
@ -7870,7 +7871,7 @@ if (reqcuflags >= 0 &&
|
|||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (UCD_OTHERCASE(reqcu) != reqcu)
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -184,8 +184,8 @@ static const char match_error_texts[] =
|
|||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code point > 0x10ffff is not defined\0"
|
||||
"UTF-8 error: code points 0xd000-0xdfff are not defined\0"
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
|
@ -198,8 +198,8 @@ static const char match_error_texts[] =
|
|||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: surrogate character not allowed\0"
|
||||
"UTF-32 error: code point > 0x10ffff is not defined\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad count value\0"
|
||||
/* 30 */
|
||||
"pattern compiled with other endianness\0"
|
||||
|
|
|
@ -240,6 +240,10 @@ Unicode doesn't go beyond 0x0010ffff. */
|
|||
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* This is the largest valid UTF/Unicode code point. */
|
||||
|
||||
#define MAX_UTF_CODE_POINT 0x10ffff
|
||||
|
||||
/* Compile-time errors are added to this value. As they are documented, it
|
||||
should probably never be changed. */
|
||||
|
||||
|
@ -574,9 +578,6 @@ total length. */
|
|||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* -------------------- Character and string names ------------------------ */
|
||||
|
||||
/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||
|
|
|
@ -279,9 +279,8 @@ static void
|
|||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t options = re->compile_options;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = nametable + re->name_count * re->name_entry_size;
|
||||
|
|
|
@ -125,7 +125,7 @@ PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
|||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
|
|
|
@ -147,8 +147,6 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
|||
const char *message, *addmessage;
|
||||
size_t length, addlength;
|
||||
|
||||
errcode -= COMPILE_ERROR_BASE;
|
||||
|
||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
length = strlen(message) + 1;
|
||||
|
@ -237,8 +235,8 @@ if (preg->re_pcre2_code == NULL)
|
|||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
preg->re_match_data = ((cflags & REG_NOSUB) != 0)? NULL :
|
||||
pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -497,7 +497,7 @@ static modstruct modlist[] = {
|
|||
#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
|
||||
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
|
||||
|
||||
#define POSIX_SUPPORTED_MATCH_CONTROLS ( 0 )
|
||||
#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
|
||||
|
||||
/* Table of single-character abbreviated modifiers. The index field is
|
||||
initialized to -1, but the first time the modifier is encountered, it is filled
|
||||
|
@ -2884,7 +2884,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
|||
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||
((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
|
||||
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||
((options & PCRE2_UTF) != 0)? " utf" : "",
|
||||
((options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||
|
@ -3884,7 +3884,7 @@ static int
|
|||
callout_function(pcre2_callout_block_8 *cb)
|
||||
{
|
||||
uint32_t i, pre_start, post_start, subject_length;
|
||||
BOOL utf = (FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
|
||||
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
|
||||
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
|
||||
FILE *f = (first_callout || callout_capture)? outfile : NULL;
|
||||
|
||||
|
@ -4033,8 +4033,10 @@ dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
|
|||
|
||||
/* Initialize for scanning the data line. */
|
||||
|
||||
utf = (pat_patctl.control & CTL_POSIX) == 0 &&
|
||||
(FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
|
||||
utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
|
||||
((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
|
||||
FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
|
||||
|
||||
start_rep = NULL;
|
||||
len = strlen((const char *)buffer);
|
||||
while (len > 0 && isspace(buffer[len-1])) len--;
|
||||
|
@ -4043,7 +4045,7 @@ p = buffer;
|
|||
while (isspace(*p)) p++;
|
||||
|
||||
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
|
||||
invalid input to pcre2_exec, you must use \x?? or \x{} sequences. */
|
||||
invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
|
||||
|
||||
if (utf)
|
||||
{
|
||||
|
@ -4414,14 +4416,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
|||
{
|
||||
fprintf(outfile, "%2d: ", (int)i);
|
||||
PCHARSV(dbuffer, pmatch[i].rm_so,
|
||||
pmatch[i].rm_eo - pmatch[i].rm_so, FALSE, outfile);
|
||||
pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
|
||||
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
|
||||
{
|
||||
fprintf(outfile, "%2d+ ", (int)i);
|
||||
PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
|
||||
FALSE, outfile);
|
||||
utf, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -5587,7 +5589,7 @@ while (notdone)
|
|||
rc = process_command();
|
||||
}
|
||||
|
||||
else if (strchr("\"/!'`-+=:;.,", *p) != NULL)
|
||||
else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
|
||||
{
|
||||
rc = process_pattern();
|
||||
dfa_matched = 0;
|
||||
|
|
|
@ -0,0 +1,398 @@
|
|||
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||
# relevance only for the 8-bit library.
|
||||
|
||||
/X(\C{3})/utf
|
||||
X\x{1234}
|
||||
|
||||
/X(\C{4})/utf
|
||||
X\x{1234}YZ
|
||||
|
||||
/X\C*/utf
|
||||
XYZabcdce
|
||||
|
||||
/X\C*?/utf
|
||||
XYZabcde
|
||||
|
||||
/X\C{3,5}/utf
|
||||
Xabcdefg
|
||||
X\x{1234}
|
||||
X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
X\x{1234}\x{512}YZ
|
||||
|
||||
/X\C{3,5}?/utf
|
||||
Xabcdefg
|
||||
X\x{1234}
|
||||
X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
|
||||
/a\Cb/utf
|
||||
aXb
|
||||
a\nb
|
||||
|
||||
/a\C\Cb/utf
|
||||
a\x{100}b
|
||||
|
||||
/ab\Cde/utf
|
||||
abXde
|
||||
|
||||
/a\C\Cb/utf
|
||||
a\x{100}b
|
||||
** Failers
|
||||
a\x{12257}b
|
||||
|
||||
/[Ã]/utf
|
||||
|
||||
/Ã/utf
|
||||
|
||||
/ÃÃÃxxx/utf
|
||||
|
||||
/badutf/utf
|
||||
\xdf
|
||||
\xef
|
||||
\xef\x80
|
||||
\xf7
|
||||
\xf7\x80
|
||||
\xf7\x80\x80
|
||||
\xfb
|
||||
\xfb\x80
|
||||
\xfb\x80\x80
|
||||
\xfb\x80\x80\x80
|
||||
\xfd
|
||||
\xfd\x80
|
||||
\xfd\x80\x80
|
||||
\xfd\x80\x80\x80
|
||||
\xfd\x80\x80\x80\x80
|
||||
\xdf\x7f
|
||||
\xef\x7f\x80
|
||||
\xef\x80\x7f
|
||||
\xf7\x7f\x80\x80
|
||||
\xf7\x80\x7f\x80
|
||||
\xf7\x80\x80\x7f
|
||||
\xfb\x7f\x80\x80\x80
|
||||
\xfb\x80\x7f\x80\x80
|
||||
\xfb\x80\x80\x7f\x80
|
||||
\xfb\x80\x80\x80\x7f
|
||||
\xfd\x7f\x80\x80\x80\x80
|
||||
\xfd\x80\x7f\x80\x80\x80
|
||||
\xfd\x80\x80\x7f\x80\x80
|
||||
\xfd\x80\x80\x80\x7f\x80
|
||||
\xfd\x80\x80\x80\x80\x7f
|
||||
\xed\xa0\x80
|
||||
\xc0\x8f
|
||||
\xe0\x80\x8f
|
||||
\xf0\x80\x80\x8f
|
||||
\xf8\x80\x80\x80\x8f
|
||||
\xfc\x80\x80\x80\x80\x8f
|
||||
\x80
|
||||
\xfe
|
||||
\xff
|
||||
|
||||
/badutf/utf
|
||||
\xfb\x80\x80\x80\x80
|
||||
\xfd\x80\x80\x80\x80\x80
|
||||
\xf7\xbf\xbf\xbf
|
||||
|
||||
/shortutf/utf
|
||||
\xdf\=ph
|
||||
\xef\=ph
|
||||
\xef\x80\=ph
|
||||
\xf7\=ph
|
||||
\xf7\x80\=ph
|
||||
\xf7\x80\x80\=ph
|
||||
\xfb\=ph
|
||||
\xfb\x80\=ph
|
||||
\xfb\x80\x80\=ph
|
||||
\xfb\x80\x80\x80\=ph
|
||||
\xfd\=ph
|
||||
\xfd\x80\=ph
|
||||
\xfd\x80\x80\=ph
|
||||
\xfd\x80\x80\x80\=ph
|
||||
\xfd\x80\x80\x80\x80\=ph
|
||||
|
||||
/anything/utf
|
||||
\xc0\x80
|
||||
\xc1\x8f
|
||||
\xe0\x9f\x80
|
||||
\xf0\x8f\x80\x80
|
||||
\xf8\x87\x80\x80\x80
|
||||
\xfc\x83\x80\x80\x80\x80
|
||||
\xfe\x80\x80\x80\x80\x80
|
||||
\xff\x80\x80\x80\x80\x80
|
||||
\xc3\x8f
|
||||
\xe0\xaf\x80
|
||||
\xe1\x80\x80
|
||||
\xf0\x9f\x80\x80
|
||||
\xf1\x8f\x80\x80
|
||||
\xf8\x88\x80\x80\x80
|
||||
\xf9\x87\x80\x80\x80
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
\xf8\x88\x80\x80\x80\=no_utf_check
|
||||
\xf9\x87\x80\x80\x80\=no_utf_check
|
||||
\xfc\x84\x80\x80\x80\x80\=no_utf_check
|
||||
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
||||
|
||||
/\x{100}/IB,utf
|
||||
|
||||
/\x{1000}/IB,utf
|
||||
|
||||
/\x{10000}/IB,utf
|
||||
|
||||
/\x{100000}/IB,utf
|
||||
|
||||
/\x{10ffff}/IB,utf
|
||||
|
||||
/[\x{ff}]/IB,utf
|
||||
|
||||
/[\x{100}]/IB,utf
|
||||
|
||||
/\x80/IB,utf
|
||||
|
||||
/\xff/IB,utf
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
|
||||
\x{D55c}\x{ad6d}\x{C5B4}
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
|
||||
/\x{80}/IB,utf
|
||||
|
||||
/\x{084}/IB,utf
|
||||
|
||||
/\x{104}/IB,utf
|
||||
|
||||
/\x{861}/IB,utf
|
||||
|
||||
/\x{212ab}/IB,utf
|
||||
|
||||
# This one is here not because it's different to Perl, but because the way
|
||||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||
# can't tell the difference.)
|
||||
|
||||
/X(\C)(.*)/utf
|
||||
X\x{1234}
|
||||
X\nabc
|
||||
|
||||
# This one is here because Perl gives out a grumbly error message (quite
|
||||
# correctly, but that messes up comparisons).
|
||||
|
||||
/a\Cb/utf
|
||||
*** Failers
|
||||
a\x{100}b
|
||||
|
||||
/[^ab\xC0-\xF0]/IB,utf
|
||||
\x{f1}
|
||||
\x{bf}
|
||||
\x{100}
|
||||
\x{1000}
|
||||
*** Failers
|
||||
\x{c0}
|
||||
\x{f0}
|
||||
|
||||
/Ä€{3,4}/IB,utf
|
||||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||||
|
||||
/(\x{100}+|x)/IB,utf
|
||||
|
||||
/(\x{100}*a|x)/IB,utf
|
||||
|
||||
/(\x{100}{0,2}a|x)/IB,utf
|
||||
|
||||
/(\x{100}{1,2}a|x)/IB,utf
|
||||
|
||||
/\x{100}/IB,utf
|
||||
|
||||
/a\x{100}\x{101}*/IB,utf
|
||||
|
||||
/a\x{100}\x{101}+/IB,utf
|
||||
|
||||
/[^\x{c4}]/IB
|
||||
|
||||
/[\x{100}]/IB,utf
|
||||
\x{100}
|
||||
Z\x{100}
|
||||
\x{100}Z
|
||||
*** Failers
|
||||
|
||||
/[\xff]/IB,utf
|
||||
>\x{ff}<
|
||||
|
||||
/[^\xff]/IB,utf
|
||||
|
||||
/\x{100}abc(xyz(?1))/IB,utf
|
||||
|
||||
/a\x{1234}b/utf,posix
|
||||
a\x{1234}b
|
||||
|
||||
/\777/I,utf
|
||||
\x{1ff}
|
||||
\777
|
||||
|
||||
/\x{100}+\x{200}/IB,utf
|
||||
|
||||
/\x{100}+X/IB,utf
|
||||
|
||||
/^[\QÄ€\E-\QÅ<51>\E/B,utf
|
||||
|
||||
# This tests the stricter UTF-8 check according to RFC 3629.
|
||||
|
||||
/X/utf
|
||||
\x{d800}
|
||||
\x{d800}\=no_utf_check
|
||||
\x{da00}
|
||||
\x{da00}\=no_utf_check
|
||||
\x{dfff}
|
||||
\x{dfff}\=no_utf_check
|
||||
\x{110000}
|
||||
\x{110000}\=no_utf_check
|
||||
\x{2000000}
|
||||
\x{2000000}\=no_utf_check
|
||||
\x{7fffffff}
|
||||
\x{7fffffff}\=no_utf_check
|
||||
|
||||
/(*UTF8)\x{1234}/
|
||||
abcd\x{1234}pqr
|
||||
|
||||
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
||||
|
||||
/\h/I,utf
|
||||
ABC\x{09}
|
||||
ABC\x{20}
|
||||
ABC\x{a0}
|
||||
ABC\x{1680}
|
||||
ABC\x{180e}
|
||||
ABC\x{2000}
|
||||
ABC\x{202f}
|
||||
ABC\x{205f}
|
||||
ABC\x{3000}
|
||||
|
||||
/\v/I,utf
|
||||
ABC\x{0a}
|
||||
ABC\x{0b}
|
||||
ABC\x{0c}
|
||||
ABC\x{0d}
|
||||
ABC\x{85}
|
||||
ABC\x{2028}
|
||||
|
||||
/\h*A/I,utf
|
||||
CDBABC
|
||||
|
||||
/\v+A/I,utf
|
||||
|
||||
/\s?xxx\s/I,utf
|
||||
|
||||
/\sxxx\s/I,utf,tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
|
||||
/\S \S/I,utf,tables=2
|
||||
\x{a2} \x{84}
|
||||
A Z
|
||||
|
||||
/a+/utf
|
||||
a\x{123}aa\=offset=1
|
||||
a\x{123}aa\=offset=2
|
||||
a\x{123}aa\=offset=3
|
||||
a\x{123}aa\=offset=4
|
||||
a\x{123}aa\=offset=5
|
||||
a\x{123}aa\=offset=6
|
||||
|
||||
/\x{1234}+/Ii,utf
|
||||
|
||||
/\x{1234}+?/Ii,utf
|
||||
|
||||
/\x{1234}++/Ii,utf
|
||||
|
||||
/\x{1234}{2}/Ii,utf
|
||||
|
||||
/[^\x{c4}]/IB,utf
|
||||
|
||||
/X+\x{200}/IB,utf
|
||||
|
||||
/\R/I,utf
|
||||
|
||||
/\777/IB,utf
|
||||
|
||||
/\w+\x{C4}/B,utf
|
||||
a\x{C4}\x{C4}
|
||||
|
||||
/\w+\x{C4}/B,utf,tables=2
|
||||
a\x{C4}\x{C4}
|
||||
|
||||
/\W+\x{C4}/B,utf
|
||||
!\x{C4}
|
||||
|
||||
/\W+\x{C4}/B,utf,tables=2
|
||||
!\x{C4}
|
||||
|
||||
/\W+\x{A1}/B,utf
|
||||
!\x{A1}
|
||||
|
||||
/\W+\x{A1}/B,utf,tables=2
|
||||
!\x{A1}
|
||||
|
||||
/X\s+\x{A0}/B,utf
|
||||
X\x20\x{A0}\x{A0}
|
||||
|
||||
/X\s+\x{A0}/B,utf,tables=2
|
||||
X\x20\x{A0}\x{A0}
|
||||
|
||||
/\S+\x{A0}/B,utf
|
||||
X\x{A0}\x{A0}
|
||||
|
||||
/\S+\x{A0}/B,utf,tables=2
|
||||
X\x{A0}\x{A0}
|
||||
|
||||
/\x{a0}+\s!/B,utf
|
||||
\x{a0}\x20!
|
||||
|
||||
/\x{a0}+\s!/B,utf,tables=2
|
||||
\x{a0}\x20!
|
||||
|
||||
/A/utf
|
||||
\x{ff000041}
|
||||
\x{7f000041}
|
||||
|
||||
/(*UTF8)abc/never_utf
|
||||
|
||||
/abc/utf,never_utf
|
||||
|
||||
/\w/posix
|
||||
+++\x{c2}
|
||||
|
||||
/\w/ucp,posix
|
||||
+++\x{c2}
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
|
||||
|
||||
/AB\x{1fb0}/IB,utf
|
||||
|
||||
/AB\x{1fb0}/IBi,utf
|
||||
|
||||
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
|
||||
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||||
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||||
|
||||
/[â±¥]/Bi,utf
|
||||
|
||||
/[^â±¥]/Bi,utf
|
||||
|
||||
/\h/I
|
||||
|
||||
/\v/I
|
||||
|
||||
/\R/I
|
||||
|
||||
/[[:blank:]]/B,ucp
|
||||
|
||||
/\x{212a}+/Ii,utf
|
||||
KKkk\x{212a}
|
||||
|
||||
/s+/Ii,utf
|
||||
SSss\x{17f}
|
||||
|
||||
# End of testinput10
|
|
@ -0,0 +1,357 @@
|
|||
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||
# features that are not compatible with the 8-bit library, or which give
|
||||
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
a\nb
|
||||
|
||||
/[^\x{c4}]/IB
|
||||
|
||||
/\x{100}/I
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/Ix
|
||||
|
||||
/[\h]/B
|
||||
>\x09<
|
||||
|
||||
/[\h]+/B
|
||||
>\x09\x20\xa0<
|
||||
|
||||
/[\v]/B
|
||||
|
||||
/[^\h]/B
|
||||
|
||||
/\h+/I
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
|
||||
/[\h\x{dc00}]+/IB
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
|
||||
/\H+/I
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/[\H\x{d800}]+/
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/\v+/I
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/[\v\x{dc00}]+/IB
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\V+/I
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
|
||||
/[\V\x{d800}]+/
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
|
||||
/\R+/I,bsr=unicode
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||
XX
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||
XX
|
||||
|
||||
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
|
||||
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
|
||||
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
|
||||
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
|
||||
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||
|
||||
/^[\x{1234}\x{4321}]{2,4}?/
|
||||
\x{1234}\x{1234}\x{1234}
|
||||
|
||||
# Check maximum non-UTF character size for the 16-bit library.
|
||||
|
||||
/\x{ffff}/
|
||||
A\x{ffff}B
|
||||
|
||||
/\x{10000}/
|
||||
|
||||
/\o{20000}/
|
||||
|
||||
# Check maximum character size for the 32-bit library. These will all give
|
||||
# errors in the 16-bit library.
|
||||
|
||||
/\x{110000}/
|
||||
|
||||
/\x{7fffffff}/
|
||||
|
||||
/\x{80000000}/
|
||||
|
||||
/\x{ffffffff}/
|
||||
|
||||
/\x{100000000}/
|
||||
|
||||
/\o{17777777777}/
|
||||
|
||||
/\o{20000000000}/
|
||||
|
||||
/\o{37777777777}/
|
||||
|
||||
/\o{40000000000}/
|
||||
|
||||
/\x{7fffffff}\x{7fffffff}/I
|
||||
|
||||
/\x{80000000}\x{80000000}/I
|
||||
|
||||
/\x{ffffffff}\x{ffffffff}/I
|
||||
|
||||
# Non-UTF characters
|
||||
|
||||
/\C{2,3}/
|
||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||
|
||||
/\x{400000}\x{800000}/IBi
|
||||
|
||||
# Check character ranges
|
||||
|
||||
/[\H]/IB
|
||||
|
||||
/[\V]/IB
|
||||
|
||||
# End of testinput11
|
|
@ -0,0 +1,332 @@
|
|||
# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to
|
||||
# the 16-bit and 32-bit libraries. The output is different for each library,
|
||||
# so there are separate output files.
|
||||
|
||||
/ÃÃÃxxx/IB,utf,no_utf_check
|
||||
|
||||
/abc/utf
|
||||
Ã]
|
||||
|
||||
/X(\C{3})/utf
|
||||
X\x{11234}Y
|
||||
X\x{11234}YZ
|
||||
|
||||
/X(\C{4})/utf
|
||||
X\x{11234}YZ
|
||||
X\x{11234}YZW
|
||||
|
||||
/X\C*/utf
|
||||
XYZabcdce
|
||||
|
||||
/X\C*?/utf
|
||||
XYZabcde
|
||||
|
||||
/X\C{3,5}/utf
|
||||
Xabcdefg
|
||||
X\x{11234}Y
|
||||
X\x{11234}YZ
|
||||
X\x{11234}\x{512}
|
||||
X\x{11234}\x{512}YZ
|
||||
X\x{11234}\x{512}\x{11234}Z
|
||||
|
||||
/X\C{3,5}?/utf
|
||||
Xabcdefg
|
||||
X\x{11234}Y
|
||||
X\x{11234}YZ
|
||||
X\x{11234}\x{512}YZ
|
||||
*** Failers
|
||||
X\x{11234}
|
||||
|
||||
/a\Cb/utf
|
||||
aXb
|
||||
a\nb
|
||||
|
||||
/a\C\Cb/utf
|
||||
a\x{12257}b
|
||||
a\x{12257}\x{11234}b
|
||||
** Failers
|
||||
a\x{100}b
|
||||
|
||||
/ab\Cde/utf
|
||||
abXde
|
||||
|
||||
# Check maximum character size
|
||||
|
||||
/\x{ffff}/IB,utf
|
||||
|
||||
/\x{10000}/IB,utf
|
||||
|
||||
/\x{100}/IB,utf
|
||||
|
||||
/\x{1000}/IB,utf
|
||||
|
||||
/\x{10000}/IB,utf
|
||||
|
||||
/\x{100000}/IB,utf
|
||||
|
||||
/\x{10ffff}/IB,utf
|
||||
|
||||
/[\x{ff}]/IB,utf
|
||||
|
||||
/[\x{100}]/IB,utf
|
||||
|
||||
/\x80/IB,utf
|
||||
|
||||
/\xff/IB,utf
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
|
||||
\x{D55c}\x{ad6d}\x{C5B4}
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
|
||||
/\x{80}/IB,utf
|
||||
|
||||
/\x{084}/IB,utf
|
||||
|
||||
/\x{104}/IB,utf
|
||||
|
||||
/\x{861}/IB,utf
|
||||
|
||||
/\x{212ab}/IB,utf
|
||||
|
||||
# This one is here not because it's different to Perl, but because the way
|
||||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||
# can't tell the difference.)
|
||||
|
||||
/X(\C)(.*)/utf
|
||||
X\x{1234}
|
||||
X\nabc
|
||||
|
||||
# This one is here because Perl gives out a grumbly error message (quite
|
||||
# correctly, but that messes up comparisons).
|
||||
|
||||
/a\Cb/utf
|
||||
*** Failers
|
||||
a\x{100}b
|
||||
|
||||
/[^ab\xC0-\xF0]/IB,utf
|
||||
\x{f1}
|
||||
\x{bf}
|
||||
\x{100}
|
||||
\x{1000}
|
||||
*** Failers
|
||||
\x{c0}
|
||||
\x{f0}
|
||||
|
||||
/Ä€{3,4}/IB,utf
|
||||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||||
|
||||
/(\x{100}+|x)/IB,utf
|
||||
|
||||
/(\x{100}*a|x)/IB,utf
|
||||
|
||||
/(\x{100}{0,2}a|x)/IB,utf
|
||||
|
||||
/(\x{100}{1,2}a|x)/IB,utf
|
||||
|
||||
/\x{100}/IB,utf
|
||||
|
||||
/a\x{100}\x{101}*/IB,utf
|
||||
|
||||
/a\x{100}\x{101}+/IB,utf
|
||||
|
||||
/[^\x{c4}]/IB
|
||||
|
||||
/[\x{100}]/IB,utf
|
||||
\x{100}
|
||||
Z\x{100}
|
||||
\x{100}Z
|
||||
*** Failers
|
||||
|
||||
/[\xff]/IB,utf
|
||||
>\x{ff}<
|
||||
|
||||
/[^\xff]/IB,utf
|
||||
|
||||
/\x{100}abc(xyz(?1))/IB,utf
|
||||
|
||||
/\777/I,utf
|
||||
\x{1ff}
|
||||
\777
|
||||
|
||||
/\x{100}+\x{200}/IB,utf
|
||||
|
||||
/\x{100}+X/IB,utf
|
||||
|
||||
/^[\QÄ€\E-\QÅ<51>\E/B,utf
|
||||
|
||||
/X/utf
|
||||
\x{d800}
|
||||
\x{d800}\=no_utf_check
|
||||
\x{da00}
|
||||
\x{da00}\=no_utf_check
|
||||
\x{dc00}
|
||||
\x{dc00}\=no_utf_check
|
||||
\x{de00}
|
||||
\x{de00}\=no_utf_check
|
||||
\x{dfff}
|
||||
\x{dfff}\=no_utf_check
|
||||
\x{110000}
|
||||
\x{d800}\x{1234}
|
||||
|
||||
/(*UTF16)\x{11234}/
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*UTF)\x{11234}/I
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*UTF-32)\x{11234}/
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
|
||||
|
||||
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
|
||||
|
||||
/\h/I,utf
|
||||
ABC\x{09}
|
||||
ABC\x{20}
|
||||
ABC\x{a0}
|
||||
ABC\x{1680}
|
||||
ABC\x{180e}
|
||||
ABC\x{2000}
|
||||
ABC\x{202f}
|
||||
ABC\x{205f}
|
||||
ABC\x{3000}
|
||||
|
||||
/\v/I,utf
|
||||
ABC\x{0a}
|
||||
ABC\x{0b}
|
||||
ABC\x{0c}
|
||||
ABC\x{0d}
|
||||
ABC\x{85}
|
||||
ABC\x{2028}
|
||||
|
||||
/\h*A/I,utf
|
||||
CDBABC
|
||||
\x{2000}ABC
|
||||
|
||||
/\R*A/I,bsr=unicode,utf
|
||||
CDBABC
|
||||
\x{2028}A
|
||||
|
||||
/\v+A/I,utf
|
||||
|
||||
/\s?xxx\s/I,utf
|
||||
|
||||
/\sxxx\s/I,utf,tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
|
||||
/\S \S/I,utf,tables=2
|
||||
\x{a2} \x{84}
|
||||
A Z
|
||||
|
||||
/a+/utf
|
||||
a\x{123}aa\=offset=1
|
||||
a\x{123}aa\=offset=2
|
||||
a\x{123}aa\=offset=3
|
||||
a\x{123}aa\=offset=4
|
||||
a\x{123}aa\=offset=5
|
||||
a\x{123}aa\=offset=6
|
||||
|
||||
/\x{1234}+/Ii,utf
|
||||
|
||||
/\x{1234}+?/Ii,utf
|
||||
|
||||
/\x{1234}++/Ii,utf
|
||||
|
||||
/\x{1234}{2}/Ii,utf
|
||||
|
||||
/[^\x{c4}]/IB,utf
|
||||
|
||||
/X+\x{200}/IB,utf
|
||||
|
||||
/\R/I,utf
|
||||
|
||||
# Check bad offset
|
||||
|
||||
/a/utf
|
||||
\x{10000}\=offset=1
|
||||
\x{10000}ab\=offset=1
|
||||
\x{10000}ab\=offset=2
|
||||
\x{10000}ab\=offset=3
|
||||
\x{10000}ab\=offset=4
|
||||
\x{10000}ab\=offset=5
|
||||
|
||||
/í¼€/utf
|
||||
|
||||
/\w+\x{C4}/B,utf
|
||||
a\x{C4}\x{C4}
|
||||
|
||||
/\w+\x{C4}/B,utf,tables=2
|
||||
a\x{C4}\x{C4}
|
||||
|
||||
/\W+\x{C4}/B,utf
|
||||
!\x{C4}
|
||||
|
||||
/\W+\x{C4}/B,utf,tables=2
|
||||
!\x{C4}
|
||||
|
||||
/\W+\x{A1}/B,utf
|
||||
!\x{A1}
|
||||
|
||||
/\W+\x{A1}/B,utf,tables=2
|
||||
!\x{A1}
|
||||
|
||||
/X\s+\x{A0}/B,utf
|
||||
X\x20\x{A0}\x{A0}
|
||||
|
||||
/X\s+\x{A0}/B,utf,tables=2
|
||||
X\x20\x{A0}\x{A0}
|
||||
|
||||
/\S+\x{A0}/B,utf
|
||||
X\x{A0}\x{A0}
|
||||
|
||||
/\S+\x{A0}/B,utf,tables=2
|
||||
X\x{A0}\x{A0}
|
||||
|
||||
/\x{a0}+\s!/B,utf
|
||||
\x{a0}\x20!
|
||||
|
||||
/\x{a0}+\s!/B,utf,tables=2
|
||||
\x{a0}\x20!
|
||||
|
||||
/(*UTF)abc/never_utf
|
||||
|
||||
/abc/utf,never_utf
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
|
||||
|
||||
/AB\x{1fb0}/IB,utf
|
||||
|
||||
/AB\x{1fb0}/IBi,utf
|
||||
|
||||
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
|
||||
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||||
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||||
|
||||
/[â±¥]/Bi,utf
|
||||
|
||||
/[^â±¥]/Bi,utf
|
||||
|
||||
/[[:blank:]]/B,ucp
|
||||
|
||||
/\x{212a}+/Ii,utf
|
||||
KKkk\x{212a}
|
||||
|
||||
/s+/Ii,utf
|
||||
SSss\x{17f}
|
||||
|
||||
# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
|
||||
|
||||
/\x{110000}/utf
|
||||
|
||||
/\o{4200000}/utf
|
||||
|
||||
/\C/utf
|
||||
\x{110000}
|
||||
|
||||
# End of testinput12
|
|
@ -0,0 +1,22 @@
|
|||
# These DFA tests are for the handling of characters greater than 255 in
|
||||
# 16-bit or 32-bit, non-UTF mode.
|
||||
|
||||
#forbid_utf
|
||||
#subject dfa
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
|
||||
# End of testinput13
|
|
@ -1593,7 +1593,7 @@ a random value. /Ix
|
|||
abc\rdef
|
||||
abc\r\ndef
|
||||
|
||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
|
||||
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||
/* this is a C style comment */\=find_limits
|
||||
|
||||
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I
|
||||
|
|
|
@ -573,11 +573,11 @@
|
|||
/X\W{3}X/utf
|
||||
X\=ps
|
||||
|
||||
/\sxxx\s/utf,tables=1
|
||||
/\sxxx\s/utf,tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
|
||||
/\S \S/utf,tables=1
|
||||
/\S \S/utf,tables=2
|
||||
\x{a2} \x{84}
|
||||
|
||||
'A#хц'Bx,newline=any,utf
|
||||
|
|
|
@ -0,0 +1,141 @@
|
|||
# These are a few representative patterns whose lengths and offsets are to be
|
||||
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
# are all themselves checked in other tests. Unicode, including property
|
||||
# support, is required for these tests.
|
||||
|
||||
#pattern fullbincode,memory
|
||||
|
||||
/((?i)b)/
|
||||
|
||||
/(?s)(.*X|^B)/
|
||||
|
||||
/(?s:.*X|^B)/
|
||||
|
||||
/^[[:alnum:]]/
|
||||
|
||||
/#/Ix
|
||||
|
||||
/a#/Ix
|
||||
|
||||
/x?+/
|
||||
|
||||
/x++/
|
||||
|
||||
/x{1,3}+/
|
||||
|
||||
/(x)*+/
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||
|
||||
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
|
||||
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
|
||||
/(a(?1)b)/
|
||||
|
||||
/(a(?1)+b)/
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||
|
||||
/abc(?C255)de(?C)f/
|
||||
|
||||
/abcde/auto_callout
|
||||
|
||||
/\x{100}/utf
|
||||
|
||||
/\x{1000}/utf
|
||||
|
||||
/\x{10000}/utf
|
||||
|
||||
/\x{100000}/utf
|
||||
|
||||
/\x{10ffff}/utf
|
||||
|
||||
/\x{110000}/utf
|
||||
|
||||
/[\x{ff}]/utf
|
||||
|
||||
/[\x{100}]/utf
|
||||
|
||||
/\x80/utf
|
||||
|
||||
/\xff/utf
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||
|
||||
/[\x{100}]/utf
|
||||
|
||||
/[Z\x{100}]/utf
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/utf
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/utf
|
||||
|
||||
/[\p{L}]/
|
||||
|
||||
/[\p{^L}]/
|
||||
|
||||
/[\P{L}]/
|
||||
|
||||
/[\P{^L}]/
|
||||
|
||||
/[abc\p{L}\x{0660}]/utf
|
||||
|
||||
/[\p{Nd}]/utf
|
||||
|
||||
/[\p{Nd}+-]+/utf
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||
|
||||
/[\x{105}-\x{109}]/i,utf
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
|
||||
/[a]/
|
||||
|
||||
/[a]/utf
|
||||
|
||||
/[\xaa]/
|
||||
|
||||
/[\xaa]/utf
|
||||
|
||||
/[^a]/
|
||||
|
||||
/[^a]/utf
|
||||
|
||||
/[^\xaa]/
|
||||
|
||||
/[^\xaa]/utf
|
||||
|
||||
#pattern -memory
|
||||
|
||||
/[^\d]/utf,ucp
|
||||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
|
||||
/[[:^alpha:]\S]+/utf,ucp
|
||||
|
||||
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||
|
||||
/(((a\2)|(a*)\g<-1>))*a?/
|
||||
|
||||
# End of testinput8
|
|
@ -0,0 +1,333 @@
|
|||
# This set of tests is run only with the 8-bit library. They do not require
|
||||
# UTF-8 or Unicode property support. The file starts with all the tests of
|
||||
# the POSIX interface, because that is supported only with the 8-bit library.
|
||||
|
||||
#forbid_utf
|
||||
#pattern posix
|
||||
|
||||
/abc/
|
||||
abc
|
||||
*** Failers
|
||||
|
||||
/^abc|def/
|
||||
abcdef
|
||||
abcdef\=notbol
|
||||
|
||||
/.*((abc)$|(def))/
|
||||
defabc
|
||||
defabc\=noteol
|
||||
|
||||
/the quick brown fox/
|
||||
the quick brown fox
|
||||
*** Failers
|
||||
The Quick Brown Fox
|
||||
|
||||
/the quick brown fox/i
|
||||
the quick brown fox
|
||||
The Quick Brown Fox
|
||||
|
||||
/abc.def/
|
||||
*** Failers
|
||||
abc\ndef
|
||||
|
||||
/abc$/
|
||||
abc
|
||||
abc\n
|
||||
|
||||
/(abc)\2/
|
||||
|
||||
/(abc\1)/
|
||||
abc
|
||||
|
||||
/a*(b+)(z)(z)/
|
||||
aaaabbbbzzzz
|
||||
aaaabbbbzzzz\=ovector=0
|
||||
aaaabbbbzzzz\=ovector=1
|
||||
aaaabbbbzzzz\=ovector=2
|
||||
|
||||
/ab.cd/
|
||||
ab-cd
|
||||
ab=cd
|
||||
** Failers
|
||||
ab\ncd
|
||||
|
||||
/ab.cd/s
|
||||
ab-cd
|
||||
ab=cd
|
||||
ab\ncd
|
||||
|
||||
/a(b)c/no_auto_capture
|
||||
abc
|
||||
|
||||
/a(?P<name>b)c/no_auto_capture
|
||||
abc
|
||||
|
||||
/a?|b?/
|
||||
abc
|
||||
** Failers
|
||||
ddd\=notempty
|
||||
|
||||
/\w+A/
|
||||
CDAAAAB
|
||||
|
||||
/\w+A/ungreedy
|
||||
CDAAAAB
|
||||
|
||||
/\Biss\B/I,aftertext
|
||||
Mississippi
|
||||
|
||||
/abc/\
|
||||
|
||||
#pattern -posix
|
||||
|
||||
# End of POSIX tests
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
a\nb
|
||||
** Failers (too big char)
|
||||
A\x{123}B
|
||||
A\o{443}B
|
||||
|
||||
/\x{100}/I
|
||||
|
||||
/\o{400}/I
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/Ix
|
||||
|
||||
/\h/I
|
||||
|
||||
/\H/I
|
||||
|
||||
/\v/I
|
||||
|
||||
/\V/I
|
||||
|
||||
/\R/I
|
||||
|
||||
/[\h]/B
|
||||
>\x09<
|
||||
|
||||
/[\h]+/B
|
||||
>\x09\x20\xa0<
|
||||
|
||||
/[\v]/B
|
||||
|
||||
/[\H]/B
|
||||
|
||||
/[^\h]/B
|
||||
|
||||
/[\V]/B
|
||||
|
||||
/[\x0a\V]/B
|
||||
|
||||
/\777/I
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||
XX
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||
XX
|
||||
|
||||
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
|
||||
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
|
||||
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
|
||||
# End of testinput9
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,675 @@
|
|||
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||
# features that are not compatible with the 8-bit library, or which give
|
||||
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
0: aXb
|
||||
a\nb
|
||||
0: a\x0ab
|
||||
|
||||
/[^\x{c4}]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{c4}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\x{100}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{100}
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/Ix
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: extended
|
||||
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09<
|
||||
0: \x09
|
||||
|
||||
/[\h]+/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09\x20\xa0<
|
||||
0: \x09 \xa0
|
||||
|
||||
/[\v]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\h+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x09 \x20 \xa0 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
0: \x{200a}\xa0\x{2000}
|
||||
|
||||
/[\h\x{dc00}]+/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x09 \x20 \xa0 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
0: \x{200a}\xa0\x{2000}
|
||||
|
||||
/\H+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
0: \x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
0: \x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/[\H\x{d800}]+/
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
0: \x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
0: \x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/\v+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/[\v\x{dc00}]+/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\V+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
0: \x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
0: \x09\x0e\x84\x86
|
||||
|
||||
/[\V\x{d800}]+/
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
0: \x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
0: \x09\x0e\x84\x86
|
||||
|
||||
/\R+/I,bsr=unicode
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{d800}
|
||||
Last code unit = \x{dd00}
|
||||
Subject length lower bound = 6
|
||||
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{80}]
|
||||
[^\x{ff}]
|
||||
[^\x{100}]
|
||||
[^\x{1000}]
|
||||
[^\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
/i [^\x{80}]
|
||||
/i [^\x{ff}]
|
||||
/i [^\x{100}]
|
||||
/i [^\x{1000}]
|
||||
/i [^\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{100}]*
|
||||
[^\x{1000}]+
|
||||
[^\x{ffff}]??
|
||||
[^\x{8000}]{4}
|
||||
[^\x{8000}]*
|
||||
[^\x{7fff}]{2}
|
||||
[^\x{7fff}]{0,7}?
|
||||
[^\x{100}]{5}
|
||||
[^\x{100}]?+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
/i [^\x{100}]*
|
||||
/i [^\x{1000}]+
|
||||
/i [^\x{ffff}]??
|
||||
/i [^\x{8000}]{4}
|
||||
/i [^\x{8000}]*
|
||||
/i [^\x{7fff}]{2}
|
||||
/i [^\x{7fff}]{0,7}?
|
||||
/i [^\x{100}]{5}
|
||||
/i [^\x{100}]?+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||
XX
|
||||
0: XX
|
||||
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||
XX
|
||||
0: XX
|
||||
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||
|
||||
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
\x{100}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x{100}-\x{200}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
\x{d800}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
0: \x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[b-\xff] (neg){12,}
|
||||
[\x00-a] (neg)*+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
|
||||
\s*
|
||||
|
||||
[0-9A-Z_a-z]++
|
||||
\W+
|
||||
|
||||
[\x00-/:-\xff] (neg)*?
|
||||
\d
|
||||
0
|
||||
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
|
||||
\w*
|
||||
A
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a*
|
||||
[b-\xff\x{100}-\x{200}]?+
|
||||
a#
|
||||
a*+
|
||||
[b-\xff\x{100}-\x{200}]?
|
||||
b#
|
||||
[a-f]*+
|
||||
[g-\xff\x{100}-\x{200}]*+
|
||||
#
|
||||
[g-\xff\x{100}-\x{200}]*+
|
||||
[a-c]*+
|
||||
#
|
||||
[g-\xff\x{100}-\x{200}]*
|
||||
[a-h]*+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{1234}\x{4321}]{2,4}?/
|
||||
\x{1234}\x{1234}\x{1234}
|
||||
0: \x{1234}\x{1234}
|
||||
|
||||
# Check maximum non-UTF character size for the 16-bit library.
|
||||
|
||||
/\x{ffff}/
|
||||
A\x{ffff}B
|
||||
0: \x{ffff}
|
||||
|
||||
/\x{10000}/
|
||||
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{20000}/
|
||||
|
||||
# Check maximum character size for the 32-bit library. These will all give
|
||||
# errors in the 16-bit library.
|
||||
|
||||
/\x{110000}/
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{7fffffff}/
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{80000000}/
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{ffffffff}/
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{100000000}/
|
||||
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{17777777777}/
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{20000000000}/
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{37777777777}/
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{40000000000}/
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{7fffffff}\x{7fffffff}/I
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{80000000}\x{80000000}/I
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{ffffffff}\x{ffffffff}/I
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
|
||||
# Non-UTF characters
|
||||
|
||||
/\C{2,3}/
|
||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
0: \x00\x01\x02
|
||||
|
||||
/\x{400000}\x{800000}/IBi
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
||||
# Check character ranges
|
||||
|
||||
/[\H]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
||||
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
||||
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
||||
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
||||
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
||||
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
||||
\xfa \xfb \xfc \xfd \xfe \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[\V]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
|
||||
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
|
||||
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
||||
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||||
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
|
||||
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
|
||||
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
|
||||
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
|
||||
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
|
||||
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
|
||||
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
|
||||
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
||||
\xfc \xfd \xfe \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
# End of testinput11
|
|
@ -0,0 +1,681 @@
|
|||
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||
# features that are not compatible with the 8-bit library, or which give
|
||||
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||
# different, so they have separate output files.
|
||||
|
||||
#forbid_utf
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
0: aXb
|
||||
a\nb
|
||||
0: a\x0ab
|
||||
|
||||
/[^\x{c4}]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{c4}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\x{100}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{100}
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/Ix
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: extended
|
||||
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09<
|
||||
0: \x09
|
||||
|
||||
/[\h]+/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09\x20\xa0<
|
||||
0: \x09 \xa0
|
||||
|
||||
/[\v]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\h+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x09 \x20 \xa0 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
0: \x{200a}\xa0\x{2000}
|
||||
|
||||
/[\h\x{dc00}]+/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x09 \x20 \xa0 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
0: \x{200a}\xa0\x{2000}
|
||||
|
||||
/\H+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
0: \x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
0: \x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/[\H\x{d800}]+/
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
0: \x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
0: \x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/\v+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/[\v\x{dc00}]+/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\V+/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
0: \x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
0: \x09\x0e\x84\x86
|
||||
|
||||
/[\V\x{d800}]+/
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
0: \x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
0: \x09\x0e\x84\x86
|
||||
|
||||
/\R+/I,bsr=unicode
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
0: \x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
0: \x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{d800}
|
||||
Last code unit = \x{dd00}
|
||||
Subject length lower bound = 6
|
||||
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{80}]
|
||||
[^\x{ff}]
|
||||
[^\x{100}]
|
||||
[^\x{1000}]
|
||||
[^\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
/i [^\x{80}]
|
||||
/i [^\x{ff}]
|
||||
/i [^\x{100}]
|
||||
/i [^\x{1000}]
|
||||
/i [^\x{ffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[^\x{100}]*
|
||||
[^\x{1000}]+
|
||||
[^\x{ffff}]??
|
||||
[^\x{8000}]{4}
|
||||
[^\x{8000}]*
|
||||
[^\x{7fff}]{2}
|
||||
[^\x{7fff}]{0,7}?
|
||||
[^\x{100}]{5}
|
||||
[^\x{100}]?+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
/i [^\x{100}]*
|
||||
/i [^\x{1000}]+
|
||||
/i [^\x{ffff}]??
|
||||
/i [^\x{8000}]{4}
|
||||
/i [^\x{8000}]*
|
||||
/i [^\x{7fff}]{2}
|
||||
/i [^\x{7fff}]{0,7}?
|
||||
/i [^\x{100}]{5}
|
||||
/i [^\x{100}]?+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||
XX
|
||||
0: XX
|
||||
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||
XX
|
||||
0: XX
|
||||
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||
|
||||
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
\x{100}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x{100}-\x{200}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
\x{d800}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
0: \x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[b-\xff] (neg){12,}
|
||||
[\x00-a] (neg)*+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
|
||||
\s*
|
||||
|
||||
[0-9A-Z_a-z]++
|
||||
\W+
|
||||
|
||||
[\x00-/:-\xff] (neg)*?
|
||||
\d
|
||||
0
|
||||
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
|
||||
\w*
|
||||
A
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a*
|
||||
[b-\xff\x{100}-\x{200}]?+
|
||||
a#
|
||||
a*+
|
||||
[b-\xff\x{100}-\x{200}]?
|
||||
b#
|
||||
[a-f]*+
|
||||
[g-\xff\x{100}-\x{200}]*+
|
||||
#
|
||||
[g-\xff\x{100}-\x{200}]*+
|
||||
[a-c]*+
|
||||
#
|
||||
[g-\xff\x{100}-\x{200}]*
|
||||
[a-h]*+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{1234}\x{4321}]{2,4}?/
|
||||
\x{1234}\x{1234}\x{1234}
|
||||
0: \x{1234}\x{1234}
|
||||
|
||||
# Check maximum non-UTF character size for the 16-bit library.
|
||||
|
||||
/\x{ffff}/
|
||||
A\x{ffff}B
|
||||
0: \x{ffff}
|
||||
|
||||
/\x{10000}/
|
||||
|
||||
/\o{20000}/
|
||||
|
||||
# Check maximum character size for the 32-bit library. These will all give
|
||||
# errors in the 16-bit library.
|
||||
|
||||
/\x{110000}/
|
||||
|
||||
/\x{7fffffff}/
|
||||
|
||||
/\x{80000000}/
|
||||
|
||||
/\x{ffffffff}/
|
||||
|
||||
/\x{100000000}/
|
||||
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{17777777777}/
|
||||
|
||||
/\o{20000000000}/
|
||||
|
||||
/\o{37777777777}/
|
||||
|
||||
/\o{40000000000}/
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\x{7fffffff}\x{7fffffff}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{7fffffff}
|
||||
Last code unit = \x{7fffffff}
|
||||
Subject length lower bound = 2
|
||||
|
||||
/\x{80000000}\x{80000000}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{80000000}
|
||||
Last code unit = \x{80000000}
|
||||
Subject length lower bound = 2
|
||||
|
||||
/\x{ffffffff}\x{ffffffff}/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First code unit = \x{ffffffff}
|
||||
Last code unit = \x{ffffffff}
|
||||
Subject length lower bound = 2
|
||||
|
||||
# Non-UTF characters
|
||||
|
||||
/\C{2,3}/
|
||||
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||
0: \x{400000}\x{400001}\x{400002}
|
||||
|
||||
/\x{400000}\x{800000}/IBi
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
/i \x{400000}\x{800000}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless
|
||||
First code unit = \x{400000}
|
||||
Last code unit = \x{800000}
|
||||
Subject length lower bound = 2
|
||||
|
||||
# Check character ranges
|
||||
|
||||
/[\H]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
||||
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
||||
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
||||
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
||||
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
||||
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
||||
\xfa \xfb \xfc \xfd \xfe \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[\V]/IB
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
|
||||
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
|
||||
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
||||
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||||
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
|
||||
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
|
||||
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
|
||||
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
|
||||
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
|
||||
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
|
||||
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
|
||||
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
||||
\xfc \xfd \xfe \xff
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
# End of testinput11
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,27 @@
|
|||
# These DFA tests are for the handling of characters greater than 255 in
|
||||
# 16-bit or 32-bit, non-UTF mode.
|
||||
|
||||
#forbid_utf
|
||||
#subject dfa
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
0: \x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
0: \x{ffff}
|
||||
|
||||
# End of testinput13
|
|
@ -6723,7 +6723,7 @@ Subject length lower bound = 5
|
|||
1: \x0d
|
||||
2: \x0a
|
||||
|
||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
|
||||
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||
Capturing subpattern count = 1
|
||||
May match empty string
|
||||
No options
|
||||
|
|
|
@ -1281,13 +1281,13 @@ Partial match: abcde
|
|||
X\=ps
|
||||
Partial match: X
|
||||
|
||||
/\sxxx\s/utf,tables=1
|
||||
/\sxxx\s/utf,tables=2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
No match
|
||||
0: \x{85}xxx\x{a0}
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
No match
|
||||
0: \x{a0}xxx\x{85}
|
||||
|
||||
/\S \S/utf,tables=1
|
||||
/\S \S/utf,tables=2
|
||||
\x{a2} \x{84}
|
||||
0: \x{a2} \x{84}
|
||||
|
||||
|
|
|
@ -0,0 +1,745 @@
|
|||
# These are a few representative patterns whose lengths and offsets are to be
|
||||
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
# are all themselves checked in other tests. Unicode, including property
|
||||
# support, is required for these tests.
|
||||
|
||||
#pattern fullbincode,memory
|
||||
|
||||
/((?i)b)/
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 5 CBra 1
|
||||
5 /i b
|
||||
7 5 Ket
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s)(.*X|^B)/
|
||||
Memory allocation (code space): 38
|
||||
------------------------------------------------------------------
|
||||
0 16 Bra
|
||||
2 7 CBra 1
|
||||
5 AllAny*
|
||||
7 X
|
||||
9 5 Alt
|
||||
11 ^
|
||||
12 B
|
||||
14 12 Ket
|
||||
16 16 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s:.*X|^B)/
|
||||
Memory allocation (code space): 36
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra
|
||||
2 6 Bra
|
||||
4 AllAny*
|
||||
6 X
|
||||
8 5 Alt
|
||||
10 ^
|
||||
11 B
|
||||
13 11 Ket
|
||||
15 15 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[[:alnum:]]/
|
||||
Memory allocation (code space): 46
|
||||
------------------------------------------------------------------
|
||||
0 20 Bra
|
||||
2 ^
|
||||
3 [0-9A-Za-z]
|
||||
20 20 Ket
|
||||
22 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/#/Ix
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 2 Bra
|
||||
2 2 Ket
|
||||
4 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
May match empty string
|
||||
Options: extended
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 0
|
||||
|
||||
/a#/Ix
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: extended
|
||||
First code unit = 'a'
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/x?+/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 x?+
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x++/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 x++
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x{1,3}+/
|
||||
Memory allocation (code space): 20
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra
|
||||
2 x
|
||||
4 x{0,2}+
|
||||
7 7 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(x)*+/
|
||||
Memory allocation (code space): 26
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 Braposzero
|
||||
3 5 CBraPos 1
|
||||
6 x
|
||||
8 5 KetRpos
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||
Memory allocation (code space): 142
|
||||
------------------------------------------------------------------
|
||||
0 68 Bra
|
||||
2 ^
|
||||
3 63 CBra 1
|
||||
6 5 CBra 2
|
||||
9 a+
|
||||
11 5 Ket
|
||||
13 21 CBra 3
|
||||
16 [ab]+?
|
||||
34 21 Ket
|
||||
36 21 CBra 4
|
||||
39 [bc]+
|
||||
57 21 Ket
|
||||
59 5 CBra 5
|
||||
62 \w*+
|
||||
64 5 Ket
|
||||
66 63 Ket
|
||||
68 68 Ket
|
||||
70 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 1648
|
||||
------------------------------------------------------------------
|
||||
0 821 Bra
|
||||
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
820 \b
|
||||
821 821 Ket
|
||||
823 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 1628
|
||||
------------------------------------------------------------------
|
||||
0 811 Bra
|
||||
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
810 \b
|
||||
811 811 Ket
|
||||
813 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)b)/
|
||||
Memory allocation (code space): 32
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 9 CBra 1
|
||||
5 a
|
||||
7 2 Recurse
|
||||
9 b
|
||||
11 9 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)+b)/
|
||||
Memory allocation (code space): 40
|
||||
------------------------------------------------------------------
|
||||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
15 13 Ket
|
||||
17 17 Ket
|
||||
19 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||
Memory allocation (code space): 54
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
2 a
|
||||
4 5 CBra 1
|
||||
7 b
|
||||
9 4 Alt
|
||||
11 c
|
||||
13 9 Ket
|
||||
15 d
|
||||
17 5 CBra 2
|
||||
20 e
|
||||
22 5 Ket
|
||||
24 24 Ket
|
||||
26 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||
Memory allocation (code space): 64
|
||||
------------------------------------------------------------------
|
||||
0 29 Bra
|
||||
2 18 Bra
|
||||
4 a
|
||||
6 12 CBra 1
|
||||
9 c
|
||||
11 5 CBra 2
|
||||
14 d
|
||||
16 5 Ket
|
||||
18 12 Ket
|
||||
20 18 Ket
|
||||
22 5 CBra 3
|
||||
25 a
|
||||
27 5 Ket
|
||||
29 29 Ket
|
||||
31 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||
Memory allocation (code space): 54
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
2 5 CBra 1
|
||||
5 a
|
||||
7 5 Ket
|
||||
9 Any
|
||||
10 Any
|
||||
11 Any
|
||||
12 \1
|
||||
14 bbb
|
||||
20 2 Recurse
|
||||
22 d
|
||||
24 24 Ket
|
||||
26 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(?C255)de(?C)f/
|
||||
Memory allocation (code space): 50
|
||||
------------------------------------------------------------------
|
||||
0 22 Bra
|
||||
2 abc
|
||||
8 Callout 255 10 1
|
||||
12 de
|
||||
16 Callout 0 16 1
|
||||
20 f
|
||||
22 22 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abcde/auto_callout
|
||||
Memory allocation (code space): 78
|
||||
------------------------------------------------------------------
|
||||
0 36 Bra
|
||||
2 Callout 255 0 1
|
||||
6 a
|
||||
8 Callout 255 1 1
|
||||
12 b
|
||||
14 Callout 255 2 1
|
||||
18 c
|
||||
20 Callout 255 3 1
|
||||
24 d
|
||||
26 Callout 255 4 1
|
||||
30 e
|
||||
32 Callout 255 5 0
|
||||
36 36 Ket
|
||||
38 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100}/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{1000}/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{1000}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10000}/utf
|
||||
Memory allocation (code space): 16
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
2 \x{10000}
|
||||
5 5 Ket
|
||||
7 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100000}/utf
|
||||
Memory allocation (code space): 16
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
2 \x{100000}
|
||||
5 5 Ket
|
||||
7 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10ffff}/utf
|
||||
Memory allocation (code space): 16
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
2 \x{10ffff}
|
||||
5 5 Ket
|
||||
7 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{110000}/utf
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/[\x{ff}]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{ff}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x80/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{80}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\xff/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{ff}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||
Memory allocation (code space): 26
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 A\x{2262}\x{391}.
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = 'A'
|
||||
Last code unit = '.'
|
||||
Subject length lower bound = 4
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||
Memory allocation (code space): 22
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
2 \x{d55c}\x{ad6d}\x{c5b4}
|
||||
8 8 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \x{d55c}
|
||||
Last code unit = \x{c5b4}
|
||||
Subject length lower bound = 3
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||
Memory allocation (code space): 22
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
2 \x{65e5}\x{672c}\x{8a9e}
|
||||
8 8 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \x{65e5}
|
||||
Last code unit = \x{8a9e}
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[Z\x{100}]/utf
|
||||
Memory allocation (code space): 54
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
2 [Z\x{100}]
|
||||
24 24 Ket
|
||||
26 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||
Memory allocation (code space): 26
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 ^
|
||||
3 [\x{100}-\x{150}]
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/utf
|
||||
Memory allocation (code space): 26
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 ^
|
||||
3 [\x{100}-\x{150}]
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/utf
|
||||
Failed: error 106 at offset 13: missing terminating ] for character class
|
||||
|
||||
/[\p{L}]/
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{^L}]/
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\P{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{L}]/
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\P{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{^L}]/
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[abc\p{L}\x{0660}]/utf
|
||||
Memory allocation (code space): 60
|
||||
------------------------------------------------------------------
|
||||
0 27 Bra
|
||||
2 [a-c\p{L}\x{660}]
|
||||
27 27 Ket
|
||||
29 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}]/utf
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{Nd}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}+-]+/utf
|
||||
Memory allocation (code space): 58
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
2 [+\-\p{Nd}]++
|
||||
26 26 Ket
|
||||
28 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||
Memory allocation (code space): 32
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||
Memory allocation (code space): 32
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{105}-\x{109}]/i,utf
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\x{104}-\x{109}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
Memory allocation (code space): 52
|
||||
------------------------------------------------------------------
|
||||
0 23 Bra
|
||||
2 19 CBra 1
|
||||
5 Brazero
|
||||
6 13 SCBra 2
|
||||
9 6 Cond
|
||||
11 1 Cond ref
|
||||
13 0
|
||||
15 2 Alt
|
||||
17 8 Ket
|
||||
19 13 KetRmax
|
||||
21 19 Ket
|
||||
23 23 Ket
|
||||
25 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
Memory allocation (code space): 42
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 14 CBra 1
|
||||
5 Brazero
|
||||
6 6 SCond
|
||||
8 1 Cond ref
|
||||
10 0
|
||||
12 2 Alt
|
||||
14 8 KetRmax
|
||||
16 14 Ket
|
||||
18 18 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{aa}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{aa}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^a]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^a]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^\x{aa}]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/utf
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^\x{aa}]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
#pattern -memory
|
||||
|
||||
/[^\d]/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [^\p{Nd}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
26 26 Ket
|
||||
28 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
26 26 Ket
|
||||
28 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 [\p{L}]++
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:]\S]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 [\P{L}\P{Xsp}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||
------------------------------------------------------------------
|
||||
0 60 Bra
|
||||
2 abc
|
||||
8 5 CBra 1
|
||||
11 d
|
||||
13 4 Alt
|
||||
15 e
|
||||
17 9 Ket
|
||||
19 *THEN
|
||||
20 x
|
||||
22 12 CBra 2
|
||||
25 123
|
||||
31 *THEN
|
||||
32 4
|
||||
34 24 Alt
|
||||
36 567
|
||||
42 5 CBra 3
|
||||
45 b
|
||||
47 4 Alt
|
||||
49 q
|
||||
51 9 Ket
|
||||
53 *THEN
|
||||
54 xx
|
||||
58 36 Ket
|
||||
60 60 Ket
|
||||
62 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(((a\2)|(a*)\g<-1>))*a?/
|
||||
------------------------------------------------------------------
|
||||
0 39 Bra
|
||||
2 Brazero
|
||||
3 32 SCBra 1
|
||||
6 27 Once
|
||||
8 12 CBra 2
|
||||
11 7 CBra 3
|
||||
14 a
|
||||
16 \2
|
||||
18 7 Ket
|
||||
20 11 Alt
|
||||
22 5 CBra 4
|
||||
25 a*
|
||||
27 5 Ket
|
||||
29 22 Recurse
|
||||
31 23 Ket
|
||||
33 27 Ket
|
||||
35 32 KetRmax
|
||||
37 a?+
|
||||
39 39 Ket
|
||||
41 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
|
@ -0,0 +1,745 @@
|
|||
# These are a few representative patterns whose lengths and offsets are to be
|
||||
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
# are all themselves checked in other tests. Unicode, including property
|
||||
# support, is required for these tests.
|
||||
|
||||
#pattern fullbincode,memory
|
||||
|
||||
/((?i)b)/
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 5 CBra 1
|
||||
5 /i b
|
||||
7 5 Ket
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s)(.*X|^B)/
|
||||
Memory allocation (code space): 76
|
||||
------------------------------------------------------------------
|
||||
0 16 Bra
|
||||
2 7 CBra 1
|
||||
5 AllAny*
|
||||
7 X
|
||||
9 5 Alt
|
||||
11 ^
|
||||
12 B
|
||||
14 12 Ket
|
||||
16 16 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s:.*X|^B)/
|
||||
Memory allocation (code space): 72
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra
|
||||
2 6 Bra
|
||||
4 AllAny*
|
||||
6 X
|
||||
8 5 Alt
|
||||
10 ^
|
||||
11 B
|
||||
13 11 Ket
|
||||
15 15 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[[:alnum:]]/
|
||||
Memory allocation (code space): 60
|
||||
------------------------------------------------------------------
|
||||
0 12 Bra
|
||||
2 ^
|
||||
3 [0-9A-Za-z]
|
||||
12 12 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/#/Ix
|
||||
Memory allocation (code space): 20
|
||||
------------------------------------------------------------------
|
||||
0 2 Bra
|
||||
2 2 Ket
|
||||
4 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
May match empty string
|
||||
Options: extended
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 0
|
||||
|
||||
/a#/Ix
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: extended
|
||||
First code unit = 'a'
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/x?+/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 x?+
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x++/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 x++
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x{1,3}+/
|
||||
Memory allocation (code space): 40
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra
|
||||
2 x
|
||||
4 x{0,2}+
|
||||
7 7 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(x)*+/
|
||||
Memory allocation (code space): 52
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 Braposzero
|
||||
3 5 CBraPos 1
|
||||
6 x
|
||||
8 5 KetRpos
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||
Memory allocation (code space): 220
|
||||
------------------------------------------------------------------
|
||||
0 52 Bra
|
||||
2 ^
|
||||
3 47 CBra 1
|
||||
6 5 CBra 2
|
||||
9 a+
|
||||
11 5 Ket
|
||||
13 13 CBra 3
|
||||
16 [ab]+?
|
||||
26 13 Ket
|
||||
28 13 CBra 4
|
||||
31 [bc]+
|
||||
41 13 Ket
|
||||
43 5 CBra 5
|
||||
46 \w*+
|
||||
48 5 Ket
|
||||
50 47 Ket
|
||||
52 52 Ket
|
||||
54 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 3296
|
||||
------------------------------------------------------------------
|
||||
0 821 Bra
|
||||
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
820 \b
|
||||
821 821 Ket
|
||||
823 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 3256
|
||||
------------------------------------------------------------------
|
||||
0 811 Bra
|
||||
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
810 \b
|
||||
811 811 Ket
|
||||
813 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)b)/
|
||||
Memory allocation (code space): 64
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 9 CBra 1
|
||||
5 a
|
||||
7 2 Recurse
|
||||
9 b
|
||||
11 9 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)+b)/
|
||||
Memory allocation (code space): 80
|
||||
------------------------------------------------------------------
|
||||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
15 13 Ket
|
||||
17 17 Ket
|
||||
19 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||
Memory allocation (code space): 108
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
2 a
|
||||
4 5 CBra 1
|
||||
7 b
|
||||
9 4 Alt
|
||||
11 c
|
||||
13 9 Ket
|
||||
15 d
|
||||
17 5 CBra 2
|
||||
20 e
|
||||
22 5 Ket
|
||||
24 24 Ket
|
||||
26 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||
Memory allocation (code space): 128
|
||||
------------------------------------------------------------------
|
||||
0 29 Bra
|
||||
2 18 Bra
|
||||
4 a
|
||||
6 12 CBra 1
|
||||
9 c
|
||||
11 5 CBra 2
|
||||
14 d
|
||||
16 5 Ket
|
||||
18 12 Ket
|
||||
20 18 Ket
|
||||
22 5 CBra 3
|
||||
25 a
|
||||
27 5 Ket
|
||||
29 29 Ket
|
||||
31 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||
Memory allocation (code space): 108
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
2 5 CBra 1
|
||||
5 a
|
||||
7 5 Ket
|
||||
9 Any
|
||||
10 Any
|
||||
11 Any
|
||||
12 \1
|
||||
14 bbb
|
||||
20 2 Recurse
|
||||
22 d
|
||||
24 24 Ket
|
||||
26 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(?C255)de(?C)f/
|
||||
Memory allocation (code space): 100
|
||||
------------------------------------------------------------------
|
||||
0 22 Bra
|
||||
2 abc
|
||||
8 Callout 255 10 1
|
||||
12 de
|
||||
16 Callout 0 16 1
|
||||
20 f
|
||||
22 22 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abcde/auto_callout
|
||||
Memory allocation (code space): 156
|
||||
------------------------------------------------------------------
|
||||
0 36 Bra
|
||||
2 Callout 255 0 1
|
||||
6 a
|
||||
8 Callout 255 1 1
|
||||
12 b
|
||||
14 Callout 255 2 1
|
||||
18 c
|
||||
20 Callout 255 3 1
|
||||
24 d
|
||||
26 Callout 255 4 1
|
||||
30 e
|
||||
32 Callout 255 5 0
|
||||
36 36 Ket
|
||||
38 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100}/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{1000}/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{1000}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10000}/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{10000}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100000}/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100000}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10ffff}/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{10ffff}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{110000}/utf
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/[\x{ff}]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{ff}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x80/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{80}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\xff/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{ff}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||
Memory allocation (code space): 52
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 A\x{2262}\x{391}.
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = 'A'
|
||||
Last code unit = '.'
|
||||
Subject length lower bound = 4
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||
Memory allocation (code space): 44
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
2 \x{d55c}\x{ad6d}\x{c5b4}
|
||||
8 8 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \x{d55c}
|
||||
Last code unit = \x{c5b4}
|
||||
Subject length lower bound = 3
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||
Memory allocation (code space): 44
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
2 \x{65e5}\x{672c}\x{8a9e}
|
||||
8 8 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \x{65e5}
|
||||
Last code unit = \x{8a9e}
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{100}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[Z\x{100}]/utf
|
||||
Memory allocation (code space): 76
|
||||
------------------------------------------------------------------
|
||||
0 16 Bra
|
||||
2 [Z\x{100}]
|
||||
16 16 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||
Memory allocation (code space): 52
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 ^
|
||||
3 [\x{100}-\x{150}]
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/utf
|
||||
Memory allocation (code space): 52
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 ^
|
||||
3 [\x{100}-\x{150}]
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/utf
|
||||
Failed: error 106 at offset 13: missing terminating ] for character class
|
||||
|
||||
/[\p{L}]/
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{^L}]/
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\P{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{L}]/
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\P{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{^L}]/
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{L}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[abc\p{L}\x{0660}]/utf
|
||||
Memory allocation (code space): 88
|
||||
------------------------------------------------------------------
|
||||
0 19 Bra
|
||||
2 [a-c\p{L}\x{660}]
|
||||
19 19 Ket
|
||||
21 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}]/utf
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\p{Nd}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}+-]+/utf
|
||||
Memory allocation (code space): 84
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 [+\-\p{Nd}]++
|
||||
18 18 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||
Memory allocation (code space): 60
|
||||
------------------------------------------------------------------
|
||||
0 12 Bra
|
||||
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
12 12 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||
Memory allocation (code space): 60
|
||||
------------------------------------------------------------------
|
||||
0 12 Bra
|
||||
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
12 12 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{105}-\x{109}]/i,utf
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [\x{104}-\x{109}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
Memory allocation (code space): 104
|
||||
------------------------------------------------------------------
|
||||
0 23 Bra
|
||||
2 19 CBra 1
|
||||
5 Brazero
|
||||
6 13 SCBra 2
|
||||
9 6 Cond
|
||||
11 1 Cond ref
|
||||
13 0
|
||||
15 2 Alt
|
||||
17 8 Ket
|
||||
19 13 KetRmax
|
||||
21 19 Ket
|
||||
23 23 Ket
|
||||
25 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
Memory allocation (code space): 84
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 14 CBra 1
|
||||
5 Brazero
|
||||
6 6 SCond
|
||||
8 1 Cond ref
|
||||
10 0
|
||||
12 2 Alt
|
||||
14 8 KetRmax
|
||||
16 14 Ket
|
||||
18 18 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 a
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{aa}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 \x{aa}
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^a]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^a]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^\x{aa}]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/utf
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 4 Bra
|
||||
2 [^\x{aa}]
|
||||
4 4 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
#pattern -memory
|
||||
|
||||
/[^\d]/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
2 [^\p{Nd}]
|
||||
9 9 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
18 18 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
18 18 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra
|
||||
2 [\p{L}]++
|
||||
10 10 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:]\S]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
2 [\P{L}\P{Xsp}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||
------------------------------------------------------------------
|
||||
0 60 Bra
|
||||
2 abc
|
||||
8 5 CBra 1
|
||||
11 d
|
||||
13 4 Alt
|
||||
15 e
|
||||
17 9 Ket
|
||||
19 *THEN
|
||||
20 x
|
||||
22 12 CBra 2
|
||||
25 123
|
||||
31 *THEN
|
||||
32 4
|
||||
34 24 Alt
|
||||
36 567
|
||||
42 5 CBra 3
|
||||
45 b
|
||||
47 4 Alt
|
||||
49 q
|
||||
51 9 Ket
|
||||
53 *THEN
|
||||
54 xx
|
||||
58 36 Ket
|
||||
60 60 Ket
|
||||
62 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(((a\2)|(a*)\g<-1>))*a?/
|
||||
------------------------------------------------------------------
|
||||
0 39 Bra
|
||||
2 Brazero
|
||||
3 32 SCBra 1
|
||||
6 27 Once
|
||||
8 12 CBra 2
|
||||
11 7 CBra 3
|
||||
14 a
|
||||
16 \2
|
||||
18 7 Ket
|
||||
20 11 Alt
|
||||
22 5 CBra 4
|
||||
25 a*
|
||||
27 5 Ket
|
||||
29 22 Recurse
|
||||
31 23 Ket
|
||||
33 27 Ket
|
||||
35 32 KetRmax
|
||||
37 a?+
|
||||
39 39 Ket
|
||||
41 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
|
@ -0,0 +1,745 @@
|
|||
# These are a few representative patterns whose lengths and offsets are to be
|
||||
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
# are all themselves checked in other tests. Unicode, including property
|
||||
# support, is required for these tests.
|
||||
|
||||
#pattern fullbincode,memory
|
||||
|
||||
/((?i)b)/
|
||||
Memory allocation (code space): 17
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
3 7 CBra 1
|
||||
8 /i b
|
||||
10 7 Ket
|
||||
13 13 Ket
|
||||
16 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s)(.*X|^B)/
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
3 9 CBra 1
|
||||
8 AllAny*
|
||||
10 X
|
||||
12 6 Alt
|
||||
15 ^
|
||||
16 B
|
||||
18 15 Ket
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s:.*X|^B)/
|
||||
Memory allocation (code space): 23
|
||||
------------------------------------------------------------------
|
||||
0 19 Bra
|
||||
3 7 Bra
|
||||
6 AllAny*
|
||||
8 X
|
||||
10 6 Alt
|
||||
13 ^
|
||||
14 B
|
||||
16 13 Ket
|
||||
19 19 Ket
|
||||
22 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[[:alnum:]]/
|
||||
Memory allocation (code space): 41
|
||||
------------------------------------------------------------------
|
||||
0 37 Bra
|
||||
3 ^
|
||||
4 [0-9A-Za-z]
|
||||
37 37 Ket
|
||||
40 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/#/Ix
|
||||
Memory allocation (code space): 7
|
||||
------------------------------------------------------------------
|
||||
0 3 Bra
|
||||
3 3 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
May match empty string
|
||||
Options: extended
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 0
|
||||
|
||||
/a#/Ix
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: extended
|
||||
First code unit = 'a'
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/x?+/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 x?+
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x++/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 x++
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x{1,3}+/
|
||||
Memory allocation (code space): 13
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra
|
||||
3 x
|
||||
5 x{0,2}+
|
||||
9 9 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(x)*+/
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra
|
||||
3 Braposzero
|
||||
4 7 CBraPos 1
|
||||
9 x
|
||||
11 7 KetRpos
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||
Memory allocation (code space): 120
|
||||
------------------------------------------------------------------
|
||||
0 116 Bra
|
||||
3 ^
|
||||
4 109 CBra 1
|
||||
9 7 CBra 2
|
||||
14 a+
|
||||
16 7 Ket
|
||||
19 39 CBra 3
|
||||
24 [ab]+?
|
||||
58 39 Ket
|
||||
61 39 CBra 4
|
||||
66 [bc]+
|
||||
100 39 Ket
|
||||
103 7 CBra 5
|
||||
108 \w*+
|
||||
110 7 Ket
|
||||
113 109 Ket
|
||||
116 116 Ket
|
||||
119 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 826
|
||||
------------------------------------------------------------------
|
||||
0 822 Bra
|
||||
3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
821 \b
|
||||
822 822 Ket
|
||||
825 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||
Memory allocation (code space): 816
|
||||
------------------------------------------------------------------
|
||||
0 812 Bra
|
||||
3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
811 \b
|
||||
812 812 Ket
|
||||
815 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)b)/
|
||||
Memory allocation (code space): 22
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
3 12 CBra 1
|
||||
8 a
|
||||
10 3 Recurse
|
||||
13 b
|
||||
15 12 Ket
|
||||
18 18 Ket
|
||||
21 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)+b)/
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra
|
||||
3 18 CBra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
13 3 Recurse
|
||||
16 6 KetRmax
|
||||
19 b
|
||||
21 18 Ket
|
||||
24 24 Ket
|
||||
27 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||
Memory allocation (code space): 36
|
||||
------------------------------------------------------------------
|
||||
0 32 Bra
|
||||
3 a
|
||||
5 7 CBra 1
|
||||
10 b
|
||||
12 5 Alt
|
||||
15 c
|
||||
17 12 Ket
|
||||
20 d
|
||||
22 7 CBra 2
|
||||
27 e
|
||||
29 7 Ket
|
||||
32 32 Ket
|
||||
35 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||
Memory allocation (code space): 45
|
||||
------------------------------------------------------------------
|
||||
0 41 Bra
|
||||
3 25 Bra
|
||||
6 a
|
||||
8 17 CBra 1
|
||||
13 c
|
||||
15 7 CBra 2
|
||||
20 d
|
||||
22 7 Ket
|
||||
25 17 Ket
|
||||
28 25 Ket
|
||||
31 7 CBra 3
|
||||
36 a
|
||||
38 7 Ket
|
||||
41 41 Ket
|
||||
44 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||
Memory allocation (code space): 34
|
||||
------------------------------------------------------------------
|
||||
0 30 Bra
|
||||
3 7 CBra 1
|
||||
8 a
|
||||
10 7 Ket
|
||||
13 Any
|
||||
14 Any
|
||||
15 Any
|
||||
16 \1
|
||||
19 bbb
|
||||
25 3 Recurse
|
||||
28 d
|
||||
30 30 Ket
|
||||
33 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(?C255)de(?C)f/
|
||||
Memory allocation (code space): 31
|
||||
------------------------------------------------------------------
|
||||
0 27 Bra
|
||||
3 abc
|
||||
9 Callout 255 10 1
|
||||
15 de
|
||||
19 Callout 0 16 1
|
||||
25 f
|
||||
27 27 Ket
|
||||
30 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abcde/auto_callout
|
||||
Memory allocation (code space): 53
|
||||
------------------------------------------------------------------
|
||||
0 49 Bra
|
||||
3 Callout 255 0 1
|
||||
9 a
|
||||
11 Callout 255 1 1
|
||||
17 b
|
||||
19 Callout 255 2 1
|
||||
25 c
|
||||
27 Callout 255 3 1
|
||||
33 d
|
||||
35 Callout 255 4 1
|
||||
41 e
|
||||
43 Callout 255 5 0
|
||||
49 49 Ket
|
||||
52 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100}/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{100}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{1000}/utf
|
||||
Memory allocation (code space): 11
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra
|
||||
3 \x{1000}
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10000}/utf
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
3 \x{10000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100000}/utf
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
3 \x{100000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10ffff}/utf
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra
|
||||
3 \x{10ffff}
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{110000}/utf
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/[\x{ff}]/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{100}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x80/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{80}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\xff/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra
|
||||
3 A\x{2262}\x{391}.
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = 'A'
|
||||
Last code unit = '.'
|
||||
Subject length lower bound = 4
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra
|
||||
3 \x{d55c}\x{ad6d}\x{c5b4}
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \xed
|
||||
Last code unit = \xb4
|
||||
Subject length lower bound = 3
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra
|
||||
3 \x{65e5}\x{672c}\x{8a9e}
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf
|
||||
First code unit = \xe6
|
||||
Last code unit = \x9e
|
||||
Subject length lower bound = 3
|
||||
|
||||
/[\x{100}]/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{100}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[Z\x{100}]/utf
|
||||
Memory allocation (code space): 47
|
||||
------------------------------------------------------------------
|
||||
0 43 Bra
|
||||
3 [Z\x{100}]
|
||||
43 43 Ket
|
||||
46 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/utf
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/utf
|
||||
Failed: error 106 at offset 15: missing terminating ] for character class
|
||||
|
||||
/[\p{L}]/
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{^L}]/
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{L}]/
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{^L}]/
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[abc\p{L}\x{0660}]/utf
|
||||
Memory allocation (code space): 50
|
||||
------------------------------------------------------------------
|
||||
0 46 Bra
|
||||
3 [a-c\p{L}\x{660}]
|
||||
46 46 Ket
|
||||
49 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}]/utf
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [\p{Nd}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}+-]+/utf
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra
|
||||
3 [+\-\p{Nd}]++
|
||||
44 44 Ket
|
||||
47 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{105}-\x{109}]/i,utf
|
||||
Memory allocation (code space): 17
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra
|
||||
3 [\x{104}-\x{109}]
|
||||
13 13 Ket
|
||||
16 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
Memory allocation (code space): 38
|
||||
------------------------------------------------------------------
|
||||
0 34 Bra
|
||||
3 28 CBra 1
|
||||
8 Brazero
|
||||
9 19 SCBra 2
|
||||
14 8 Cond
|
||||
17 1 Cond ref
|
||||
20 0
|
||||
22 3 Alt
|
||||
25 11 Ket
|
||||
28 19 KetRmax
|
||||
31 28 Ket
|
||||
34 34 Ket
|
||||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
Memory allocation (code space): 30
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
3 20 CBra 1
|
||||
8 Brazero
|
||||
9 8 SCond
|
||||
12 1 Cond ref
|
||||
15 0
|
||||
17 3 Alt
|
||||
20 11 KetRmax
|
||||
23 20 Ket
|
||||
26 26 Ket
|
||||
29 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/utf
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 \x{aa}
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{aa}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^a]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/utf
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^a]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^\x{aa}]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/utf
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 [^\x{aa}]
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
#pattern -memory
|
||||
|
||||
/[^\d]/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra
|
||||
3 [^\p{Nd}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra
|
||||
3 [ -~\x80-\xff\P{L}]++
|
||||
44 44 Ket
|
||||
47 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra
|
||||
3 [ -~\x80-\xff\P{L}]++
|
||||
44 44 Ket
|
||||
47 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 12 Bra
|
||||
3 [\p{L}]++
|
||||
12 12 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^alpha:]\S]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra
|
||||
3 [\P{L}\P{Xsp}]++
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||
------------------------------------------------------------------
|
||||
0 73 Bra
|
||||
3 abc
|
||||
9 7 CBra 1
|
||||
14 d
|
||||
16 5 Alt
|
||||
19 e
|
||||
21 12 Ket
|
||||
24 *THEN
|
||||
25 x
|
||||
27 14 CBra 2
|
||||
32 123
|
||||
38 *THEN
|
||||
39 4
|
||||
41 29 Alt
|
||||
44 567
|
||||
50 7 CBra 3
|
||||
55 b
|
||||
57 5 Alt
|
||||
60 q
|
||||
62 12 Ket
|
||||
65 *THEN
|
||||
66 xx
|
||||
70 43 Ket
|
||||
73 73 Ket
|
||||
76 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(((a\2)|(a*)\g<-1>))*a?/
|
||||
------------------------------------------------------------------
|
||||
0 57 Bra
|
||||
3 Brazero
|
||||
4 48 SCBra 1
|
||||
9 40 Once
|
||||
12 18 CBra 2
|
||||
17 10 CBra 3
|
||||
22 a
|
||||
24 \2
|
||||
27 10 Ket
|
||||
30 16 Alt
|
||||
33 7 CBra 4
|
||||
38 a*
|
||||
40 7 Ket
|
||||
43 33 Recurse
|
||||
46 34 Ket
|
||||
49 40 Ket
|
||||
52 48 KetRmax
|
||||
55 a?+
|
||||
57 57 Ket
|
||||
60 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput8
|
|
@ -0,0 +1,498 @@
|
|||
# This set of tests is run only with the 8-bit library. They do not require
|
||||
# UTF-8 or Unicode property support. The file starts with all the tests of
|
||||
# the POSIX interface, because that is supported only with the 8-bit library.
|
||||
|
||||
#forbid_utf
|
||||
#pattern posix
|
||||
|
||||
/abc/
|
||||
abc
|
||||
0: abc
|
||||
*** Failers
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/^abc|def/
|
||||
abcdef
|
||||
0: abc
|
||||
abcdef\=notbol
|
||||
0: def
|
||||
|
||||
/.*((abc)$|(def))/
|
||||
defabc
|
||||
0: defabc
|
||||
1: abc
|
||||
2: abc
|
||||
defabc\=noteol
|
||||
0: def
|
||||
1: def
|
||||
3: def
|
||||
|
||||
/the quick brown fox/
|
||||
the quick brown fox
|
||||
0: the quick brown fox
|
||||
*** Failers
|
||||
No match: POSIX code 17: match failed
|
||||
The Quick Brown Fox
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/the quick brown fox/i
|
||||
the quick brown fox
|
||||
0: the quick brown fox
|
||||
The Quick Brown Fox
|
||||
0: The Quick Brown Fox
|
||||
|
||||
/abc.def/
|
||||
*** Failers
|
||||
No match: POSIX code 17: match failed
|
||||
abc\ndef
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/abc$/
|
||||
abc
|
||||
0: abc
|
||||
abc\n
|
||||
0: abc
|
||||
|
||||
/(abc)\2/
|
||||
Failed: POSIX code 15: bad back reference at offset 7
|
||||
|
||||
/(abc\1)/
|
||||
abc
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/a*(b+)(z)(z)/
|
||||
aaaabbbbzzzz
|
||||
0: aaaabbbbzz
|
||||
1: bbbb
|
||||
2: z
|
||||
3: z
|
||||
aaaabbbbzzzz\=ovector=0
|
||||
Matched without capture
|
||||
aaaabbbbzzzz\=ovector=1
|
||||
0: aaaabbbbzz
|
||||
aaaabbbbzzzz\=ovector=2
|
||||
0: aaaabbbbzz
|
||||
1: bbbb
|
||||
|
||||
/ab.cd/
|
||||
ab-cd
|
||||
0: ab-cd
|
||||
ab=cd
|
||||
0: ab=cd
|
||||
** Failers
|
||||
No match: POSIX code 17: match failed
|
||||
ab\ncd
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/ab.cd/s
|
||||
ab-cd
|
||||
0: ab-cd
|
||||
ab=cd
|
||||
0: ab=cd
|
||||
ab\ncd
|
||||
0: ab\x0acd
|
||||
|
||||
/a(b)c/no_auto_capture
|
||||
abc
|
||||
Matched with REG_NOSUB
|
||||
|
||||
/a(?P<name>b)c/no_auto_capture
|
||||
abc
|
||||
Matched with REG_NOSUB
|
||||
|
||||
/a?|b?/
|
||||
abc
|
||||
0: a
|
||||
** Failers
|
||||
0:
|
||||
ddd\=notempty
|
||||
No match: POSIX code 17: match failed
|
||||
|
||||
/\w+A/
|
||||
CDAAAAB
|
||||
0: CDAAAA
|
||||
|
||||
/\w+A/ungreedy
|
||||
CDAAAAB
|
||||
0: CDA
|
||||
|
||||
/\Biss\B/I,aftertext
|
||||
** Ignored with POSIX interface: info
|
||||
Mississippi
|
||||
0: iss
|
||||
0+ issippi
|
||||
|
||||
/abc/\
|
||||
Failed: POSIX code 9: bad escape sequence at offset 4
|
||||
|
||||
#pattern -posix
|
||||
|
||||
# End of POSIX tests
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
0: aXb
|
||||
a\nb
|
||||
0: a\x0ab
|
||||
** Failers (too big char)
|
||||
No match
|
||||
A\x{123}B
|
||||
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
No match
|
||||
A\o{443}B
|
||||
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
||||
** Truncation will probably give the wrong result.
|
||||
No match
|
||||
|
||||
/\x{100}/I
|
||||
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/\o{400}/I
|
||||
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/Ix
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: extended
|
||||
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||||
No last code unit
|
||||
Subject length lower bound = 3
|
||||
|
||||
/\h/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x09 \x20 \xa0
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\H/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\v/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\V/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first code unit
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/\R/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Starting code units: \x0a \x0b \x0c \x0d \x85
|
||||
No last code unit
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09<
|
||||
0: \x09
|
||||
|
||||
/[\h]+/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x09 \xa0]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
>\x09\x20\xa0<
|
||||
0: \x09 \xa0
|
||||
|
||||
/[\v]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x0a-\x0d\x85]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\H]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\h]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\V]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x09\x0e-\x84\x86-\xff]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x0a\V]/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x0a\x0e-\x84\x86-\xff]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\777/I
|
||||
Failed: error 151 at offset 3: octal value is greater than \377 in 8-bit non-UTF-8 mode
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
XX
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||
XX
|
||||
0: XX
|
||||
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||
|
||||
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
Failed: error 177 at offset 5: character code point value in \u.... sequence is too large
|
||||
|
||||
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
Failed: error 177 at offset 6: character code point value in \u.... sequence is too large
|
||||
|
||||
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[b-\xff] (neg){12,}+
|
||||
[\x00-a] (neg)*+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x08\x0e-\x1f!-\xff] (neg)*+
|
||||
\s*
|
||||
|
||||
[0-9A-Z_a-z]++
|
||||
\W+
|
||||
|
||||
[\x00-/:-\xff] (neg)*+
|
||||
\d
|
||||
0
|
||||
[\x00-/:-@[-^`{-\xff] (neg){4,6}+
|
||||
\w*
|
||||
A
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput9
|
Loading…
Reference in New Issue