All tests except JIT and save/reload are implemented.
This commit is contained in:
parent
e2076960d4
commit
e022475d54
481
RunTest
481
RunTest
|
@ -58,22 +58,18 @@ title5B=" and UCP support"
|
||||||
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
||||||
title7A="Test 7: DFA matching with UTF"
|
title7A="Test 7: DFA matching with UTF"
|
||||||
title7B=" and Unicode property support"
|
title7B=" and Unicode property support"
|
||||||
#title11="Test 11: Internal offsets and code size tests"
|
title8="Test 8: Internal offsets and code size tests"
|
||||||
|
title9="Test 9: Specials for the basic 8-bit library"
|
||||||
|
title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
|
||||||
|
title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
|
||||||
|
title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
|
||||||
|
title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
|
||||||
|
|
||||||
#title12="Test 12: JIT-specific features (when JIT is available)"
|
#title12="Test 12: JIT-specific features (when JIT is available)"
|
||||||
#title13="Test 13: JIT-specific features (when JIT is not available)"
|
#title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||||
#title14="Test 14: Specials for the basic 8-bit library"
|
|
||||||
#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
|
||||||
#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
|
||||||
#title17="Test 17: Specials for the basic 16/32-bit library"
|
|
||||||
#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
|
|
||||||
#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
|
|
||||||
#title20="Test 20: DFA specials for the basic 16/32-bit library"
|
|
||||||
#title21="Test 21: Reloads for the basic 16/32-bit library"
|
#title21="Test 21: Reloads for the basic 16/32-bit library"
|
||||||
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
|
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
|
||||||
#title23="Test 23: Specials for the 16-bit library"
|
|
||||||
#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
|
|
||||||
#title25="Test 25: Specials for the 32-bit library"
|
|
||||||
#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
|
||||||
|
|
||||||
maxtest=2
|
maxtest=2
|
||||||
|
|
||||||
|
@ -85,12 +81,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
echo $title5A $title5B
|
echo $title5A $title5B
|
||||||
echo $title6
|
echo $title6
|
||||||
echo $title7A $title7B
|
echo $title7A $title7B
|
||||||
# echo $title8
|
echo $title8
|
||||||
# echo $title9
|
echo $title9
|
||||||
# echo $title10
|
echo $title10
|
||||||
# echo $title11
|
echo $title11
|
||||||
# echo $title12
|
echo $title12
|
||||||
# echo $title13
|
echo $title13
|
||||||
# echo $title14
|
# echo $title14
|
||||||
# echo $title15
|
# echo $title15
|
||||||
# echo $title16
|
# echo $title16
|
||||||
|
@ -100,10 +96,6 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||||
# echo $title20
|
# echo $title20
|
||||||
# echo $title21
|
# echo $title21
|
||||||
# echo $title22
|
# echo $title22
|
||||||
# echo $title23
|
|
||||||
# echo $title24
|
|
||||||
# echo $title25
|
|
||||||
# echo $title26
|
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -178,12 +170,12 @@ do4=no
|
||||||
do5=no
|
do5=no
|
||||||
do6=no
|
do6=no
|
||||||
do7=no
|
do7=no
|
||||||
#do8=no
|
do8=no
|
||||||
#do9=no
|
do9=no
|
||||||
#do10=no
|
do10=no
|
||||||
#do11=no
|
do11=no
|
||||||
#do12=no
|
do12=no
|
||||||
#do13=no
|
do13=no
|
||||||
#do14=no
|
#do14=no
|
||||||
#do15=no
|
#do15=no
|
||||||
#do16=no
|
#do16=no
|
||||||
|
@ -193,10 +185,6 @@ do7=no
|
||||||
#do20=no
|
#do20=no
|
||||||
#do21=no
|
#do21=no
|
||||||
#do22=no
|
#do22=no
|
||||||
#do23=no
|
|
||||||
#do24=no
|
|
||||||
#do25=no
|
|
||||||
#do26=no
|
|
||||||
|
|
||||||
while [ $# -gt 0 ] ; do
|
while [ $# -gt 0 ] ; do
|
||||||
case $1 in
|
case $1 in
|
||||||
|
@ -207,12 +195,12 @@ while [ $# -gt 0 ] ; do
|
||||||
5) do5=yes;;
|
5) do5=yes;;
|
||||||
6) do6=yes;;
|
6) do6=yes;;
|
||||||
7) do7=yes;;
|
7) do7=yes;;
|
||||||
# 8) do8=yes;;
|
8) do8=yes;;
|
||||||
# 9) do9=yes;;
|
9) do9=yes;;
|
||||||
# 10) do10=yes;;
|
10) do10=yes;;
|
||||||
# 11) do11=yes;;
|
11) do11=yes;;
|
||||||
# 12) do12=yes;;
|
12) do12=yes;;
|
||||||
# 13) do13=yes;;
|
13) do13=yes;;
|
||||||
# 14) do14=yes;;
|
# 14) do14=yes;;
|
||||||
# 15) do15=yes;;
|
# 15) do15=yes;;
|
||||||
# 16) do16=yes;;
|
# 16) do16=yes;;
|
||||||
|
@ -222,10 +210,6 @@ while [ $# -gt 0 ] ; do
|
||||||
# 20) do20=yes;;
|
# 20) do20=yes;;
|
||||||
# 21) do21=yes;;
|
# 21) do21=yes;;
|
||||||
# 22) do22=yes;;
|
# 22) do22=yes;;
|
||||||
# 23) do23=yes;;
|
|
||||||
# 24) do24=yes;;
|
|
||||||
# 25) do25=yes;;
|
|
||||||
# 26) do26=yes;;
|
|
||||||
-8) arg8=yes;;
|
-8) arg8=yes;;
|
||||||
-16) arg16=yes;;
|
-16) arg16=yes;;
|
||||||
-32) arg32=yes;;
|
-32) arg32=yes;;
|
||||||
|
@ -330,7 +314,7 @@ else
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# UTF support always applies to all bit sizes if both are supported; we can't
|
# UTF support always applies to all bit sizes if both are supported; we can't
|
||||||
# have UTF-8 support without UTF-16 support (for example).
|
# have UTF-8 support without UTF-16 or UTF-32 support.
|
||||||
|
|
||||||
$sim ./pcre2test -C utf >/dev/null
|
$sim ./pcre2test -C utf >/dev/null
|
||||||
utf=$?
|
utf=$?
|
||||||
|
@ -346,14 +330,13 @@ fi
|
||||||
# relevant will be automatically skipped.
|
# relevant will be automatically skipped.
|
||||||
|
|
||||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||||
$do5 = no -a $do6 = no -a $do7 = no \
|
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||||
|
$do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
||||||
|
$do13 = no \
|
||||||
]; then
|
]; then
|
||||||
# -a $do8 = no -a \
|
# -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
||||||
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
|
||||||
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
|
||||||
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
|
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
|
||||||
# $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
|
# $do21 = no -a $do22 = no
|
||||||
# $do25 = no -a $do26 = no
|
|
||||||
|
|
||||||
do1=yes
|
do1=yes
|
||||||
do2=yes
|
do2=yes
|
||||||
|
@ -362,12 +345,12 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||||
do5=yes
|
do5=yes
|
||||||
do6=yes
|
do6=yes
|
||||||
do7=yes
|
do7=yes
|
||||||
# do8=yes
|
do8=yes
|
||||||
# do9=yes
|
do9=yes
|
||||||
# do10=yes
|
do10=yes
|
||||||
# do11=yes
|
do11=yes
|
||||||
# do12=yes
|
do12=yes
|
||||||
# do13=yes
|
do13=yes
|
||||||
# do14=yes
|
# do14=yes
|
||||||
# do15=yes
|
# do15=yes
|
||||||
# do16=yes
|
# do16=yes
|
||||||
|
@ -377,10 +360,6 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||||
# do20=yes
|
# do20=yes
|
||||||
# do21=yes
|
# do21=yes
|
||||||
# do22=yes
|
# do22=yes
|
||||||
# do23=yes
|
|
||||||
# do24=yes
|
|
||||||
# do25=yes
|
|
||||||
# do26=yes
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||||
|
@ -584,32 +563,137 @@ if [ $do7 = yes ] ; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## Test of internal offsets and code sizes. This test is run only when there
|
# Test of internal offsets and code sizes. This test is run only when there
|
||||||
## is Unicode property support and the link size is 2. The actual tests are
|
# is UTF/UCP support and the link size is 2. The actual tests are
|
||||||
## mostly the same as in some of the above, but in this test we inspect some
|
# mostly the same as in some of the above, but in this test we inspect some
|
||||||
## offsets and sizes that require a known link size. This is a doublecheck for
|
# offsets and sizes that require a known link size. This is a doublecheck for
|
||||||
## the maintainer, just in case something changes unexpectely. The output from
|
# the maintainer, just in case something changes unexpectely. The output from
|
||||||
## this test is not the same in 8-bit and 16-bit modes.
|
# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are
|
||||||
#
|
# mode-specific output files.
|
||||||
#if [ $do11 = yes ] ; then
|
|
||||||
# echo $title11
|
if [ $do8 = yes ] ; then
|
||||||
# if [ $link_size -ne 2 ] ; then
|
echo $title8
|
||||||
# echo " Skipped because link size is not 2"
|
if [ $link_size -ne 2 ] ; then
|
||||||
# elif [ $ucp -eq 0 ] ; then
|
echo " Skipped because link size is not 2"
|
||||||
# echo " Skipped because Unicode property support is not available"
|
elif [ $utf -eq 0 ] ; then
|
||||||
# else
|
echo " Skipped because UTF-$bits support is not available"
|
||||||
# for opt in "" "-s"; do
|
else
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
|
||||||
# if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
# $cf $testdata/testoutput11-$bits testtry
|
$cf $testdata/testoutput8-$bits testtry
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
# else exit 1
|
else exit 1
|
||||||
# fi
|
fi
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
echo " OK"
|
||||||
# done
|
fi
|
||||||
# fi
|
fi
|
||||||
#fi
|
|
||||||
#
|
# Tests for 8-bit-specific features
|
||||||
|
|
||||||
|
if [ "$do9" = yes ] ; then
|
||||||
|
echo $title9
|
||||||
|
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||||
|
echo " Skipped when running 16/32-bit tests"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt; do
|
||||||
|
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
$cf $testdata/testoutput9 testtry
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
else exit 1
|
||||||
|
fi
|
||||||
|
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||||
|
else echo " OK"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Tests for UTF-8 and UCP 8-bit-specific features
|
||||||
|
|
||||||
|
if [ "$do10" = yes ] ; then
|
||||||
|
echo $title10
|
||||||
|
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||||
|
echo " Skipped when running 16/32-bit tests"
|
||||||
|
elif [ $utf -eq 0 ] ; then
|
||||||
|
echo " Skipped because UTF-$bits support is not available"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt; do
|
||||||
|
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
$cf $testdata/testoutput10 testtry
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
else exit 1
|
||||||
|
fi
|
||||||
|
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||||
|
else echo " OK"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Tests for 16-bit and 32-bit features. Output is different for the two widths.
|
||||||
|
|
||||||
|
if [ $do11 = yes ] ; then
|
||||||
|
echo $title11
|
||||||
|
if [ "$bits" = "8" ] ; then
|
||||||
|
echo " Skipped when running 8-bit tests"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt; do
|
||||||
|
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
$cf $testdata/testoutput11-$bits testtry
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
else exit 1
|
||||||
|
fi
|
||||||
|
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||||
|
else echo " OK"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output
|
||||||
|
# is different for the two widths.
|
||||||
|
|
||||||
|
if [ $do12 = yes ] ; then
|
||||||
|
echo $title12
|
||||||
|
if [ "$bits" = "8" ] ; then
|
||||||
|
echo " Skipped when running 8-bit tests"
|
||||||
|
elif [ $utf -eq 0 ] ; then
|
||||||
|
echo " Skipped because UTF-$bits support is not available"
|
||||||
|
else
|
||||||
|
for opt in "" $jitopt; do
|
||||||
|
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
$cf $testdata/testoutput12-$bits testtry
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
else exit 1
|
||||||
|
fi
|
||||||
|
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||||
|
else echo " OK"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Tests for 16/32-bit-specific features in DFA non-UTF modes
|
||||||
|
|
||||||
|
if [ $do13 = yes ] ; then
|
||||||
|
echo $title13
|
||||||
|
if [ "$bits" = "8" ] ; then
|
||||||
|
echo " Skipped when running 8-bit tests"
|
||||||
|
else
|
||||||
|
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
|
||||||
|
if [ $? = 0 ] ; then
|
||||||
|
$cf $testdata/testoutput13 testtry
|
||||||
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
else exit 1
|
||||||
|
fi
|
||||||
|
echo " OK"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
## Test JIT-specific features when JIT is available
|
## Test JIT-specific features when JIT is available
|
||||||
#
|
#
|
||||||
#if [ $do12 = yes ] ; then
|
#if [ $do12 = yes ] ; then
|
||||||
|
@ -644,169 +728,6 @@ fi
|
||||||
# fi
|
# fi
|
||||||
#fi
|
#fi
|
||||||
#
|
#
|
||||||
## Tests for 8-bit-specific features
|
|
||||||
#
|
|
||||||
#if [ "$do14" = yes ] ; then
|
|
||||||
# echo $title14
|
|
||||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
|
||||||
# echo " Skipped when running 16/32-bit tests"
|
|
||||||
# else
|
|
||||||
# cp -f $testdata/saved16 testsaved16
|
|
||||||
# cp -f $testdata/saved32 testsaved32
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput14 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 8-bit-specific features (needs UTF-8 support)
|
|
||||||
#
|
|
||||||
#if [ "$do15" = yes ] ; then
|
|
||||||
# echo $title15
|
|
||||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
|
||||||
# echo " Skipped when running 16/32-bit tests"
|
|
||||||
# elif [ $utf -eq 0 ] ; then
|
|
||||||
# echo " Skipped because UTF-$bits support is not available"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput15 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 8-bit-specific features (Unicode property support)
|
|
||||||
#
|
|
||||||
#if [ $do16 = yes ] ; then
|
|
||||||
# echo $title16
|
|
||||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
|
||||||
# echo " Skipped when running 16/32-bit tests"
|
|
||||||
# elif [ $ucp -eq 0 ] ; then
|
|
||||||
# echo " Skipped because Unicode property support is not available"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput16 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 16/32-bit-specific features
|
|
||||||
#
|
|
||||||
#if [ $do17 = yes ] ; then
|
|
||||||
# echo $title17
|
|
||||||
# if [ "$bits" = "8" ] ; then
|
|
||||||
# echo " Skipped when running 8-bit tests"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput17 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 16/32-bit-specific features (UTF-16/32 support)
|
|
||||||
#
|
|
||||||
#if [ $do18 = yes ] ; then
|
|
||||||
# echo $title18
|
|
||||||
# if [ "$bits" = "8" ] ; then
|
|
||||||
# echo " Skipped when running 8-bit tests"
|
|
||||||
# elif [ $utf -eq 0 ] ; then
|
|
||||||
# echo " Skipped because UTF-$bits support is not available"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput18-$bits testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 16/32-bit-specific features (Unicode property support)
|
|
||||||
#
|
|
||||||
#if [ $do19 = yes ] ; then
|
|
||||||
# echo $title19
|
|
||||||
# if [ "$bits" = "8" ] ; then
|
|
||||||
# echo " Skipped when running 8-bit tests"
|
|
||||||
# elif [ $ucp -eq 0 ] ; then
|
|
||||||
# echo " Skipped because Unicode property support is not available"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s" $jitopt; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput19 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
|
|
||||||
#
|
|
||||||
#if [ $do20 = yes ] ; then
|
|
||||||
# echo $title20
|
|
||||||
# if [ "$bits" = "8" ] ; then
|
|
||||||
# echo " Skipped when running 8-bit tests"
|
|
||||||
# else
|
|
||||||
# for opt in "" "-s"; do
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput20 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
|
||||||
# else echo " OK"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
## Tests for reloads with 16/32-bit library
|
## Tests for reloads with 16/32-bit library
|
||||||
#
|
#
|
||||||
#if [ $do21 = yes ] ; then
|
#if [ $do21 = yes ] ; then
|
||||||
|
@ -855,70 +776,6 @@ fi
|
||||||
# echo " OK"
|
# echo " OK"
|
||||||
# fi
|
# fi
|
||||||
#fi
|
#fi
|
||||||
#
|
|
||||||
#if [ $do23 = yes ] ; then
|
|
||||||
# echo $title23
|
|
||||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
|
||||||
# echo " Skipped when running 8/32-bit tests"
|
|
||||||
# else
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput23 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# echo " OK"
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
#if [ $do24 = yes ] ; then
|
|
||||||
# echo $title24
|
|
||||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
|
||||||
# echo " Skipped when running 8/32-bit tests"
|
|
||||||
# elif [ $utf -eq 0 ] ; then
|
|
||||||
# echo " Skipped because UTF-$bits support is not available"
|
|
||||||
# else
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput24 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# echo " OK"
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
#if [ $do25 = yes ] ; then
|
|
||||||
# echo $title25
|
|
||||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
|
||||||
# echo " Skipped when running 8/16-bit tests"
|
|
||||||
# else
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput25 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# echo " OK"
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
#
|
|
||||||
#if [ $do26 = yes ] ; then
|
|
||||||
# echo $title26
|
|
||||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
|
||||||
# echo " Skipped when running 8/16-bit tests"
|
|
||||||
# elif [ $utf -eq 0 ] ; then
|
|
||||||
# echo " Skipped because UTF-$bits support is not available"
|
|
||||||
# else
|
|
||||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
|
|
||||||
# if [ $? = 0 ] ; then
|
|
||||||
# $cf $testdata/testoutput26 testtry
|
|
||||||
# if [ $? != 0 ] ; then exit 1; fi
|
|
||||||
# else exit 1
|
|
||||||
# fi
|
|
||||||
# echo " OK"
|
|
||||||
# fi
|
|
||||||
#fi
|
|
||||||
|
|
||||||
# End of loop for 8/16/32-bit tests
|
# End of loop for 8/16/32-bit tests
|
||||||
done
|
done
|
||||||
|
|
|
@ -286,9 +286,10 @@ This is a pattern line whose modifier list starts with two one-letter modifiers
|
||||||
.SH "PATTERN SYNTAX"
|
.SH "PATTERN SYNTAX"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
A pattern line must start with one of the following characters:
|
A pattern line must start with one of the following characters (common symbols,
|
||||||
|
excluding pattern meta-characters):
|
||||||
.sp
|
.sp
|
||||||
" / ! ' ` - + = : ; . ,
|
/ ! " ' ` - = _ : ; , % & @ ~
|
||||||
.sp
|
.sp
|
||||||
This is interpreted as the pattern's delimiter. A regular expression may be
|
This is interpreted as the pattern's delimiter. A regular expression may be
|
||||||
continued over several input lines, in which case the newline characters are
|
continued over several input lines, in which case the newline characters are
|
||||||
|
|
|
@ -7833,11 +7833,12 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
|
||||||
|
|
||||||
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
|
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
|
||||||
8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||||
points and cannot have another case. In 16-bit and 32-bit mode, we can
|
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||||
check wide characters when UTF (and therefore UCP) is supported. */
|
check wide characters when UTF (and therefore UCP) is supported. */
|
||||||
|
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
else if (UCD_OTHERCASE(firstcu) != firstcu)
|
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||||
|
UCD_OTHERCASE(firstcu) != firstcu)
|
||||||
re->flags |= PCRE2_FIRSTCASELESS;
|
re->flags |= PCRE2_FIRSTCASELESS;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -7870,7 +7871,7 @@ if (reqcuflags >= 0 &&
|
||||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||||
}
|
}
|
||||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
else if (UCD_OTHERCASE(reqcu) != reqcu)
|
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||||
re->flags |= PCRE2_LASTCASELESS;
|
re->flags |= PCRE2_LASTCASELESS;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -184,8 +184,8 @@ static const char match_error_texts[] =
|
||||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||||
/* 15 */
|
/* 15 */
|
||||||
"UTF-8 error: code point > 0x10ffff is not defined\0"
|
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||||
"UTF-8 error: code points 0xd000-0xdfff are not defined\0"
|
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
"UTF-8 error: overlong 2-byte sequence\0"
|
"UTF-8 error: overlong 2-byte sequence\0"
|
||||||
"UTF-8 error: overlong 3-byte sequence\0"
|
"UTF-8 error: overlong 3-byte sequence\0"
|
||||||
"UTF-8 error: overlong 4-byte sequence\0"
|
"UTF-8 error: overlong 4-byte sequence\0"
|
||||||
|
@ -198,8 +198,8 @@ static const char match_error_texts[] =
|
||||||
/* 25 */
|
/* 25 */
|
||||||
"UTF-16 error: invalid low surrogate\0"
|
"UTF-16 error: invalid low surrogate\0"
|
||||||
"UTF-16 error: isolated low surrogate\0"
|
"UTF-16 error: isolated low surrogate\0"
|
||||||
"UTF-32 error: surrogate character not allowed\0"
|
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
"UTF-32 error: code point > 0x10ffff is not defined\0"
|
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||||
"bad count value\0"
|
"bad count value\0"
|
||||||
/* 30 */
|
/* 30 */
|
||||||
"pattern compiled with other endianness\0"
|
"pattern compiled with other endianness\0"
|
||||||
|
|
|
@ -240,6 +240,10 @@ Unicode doesn't go beyond 0x0010ffff. */
|
||||||
|
|
||||||
#define NOTACHAR 0xffffffff
|
#define NOTACHAR 0xffffffff
|
||||||
|
|
||||||
|
/* This is the largest valid UTF/Unicode code point. */
|
||||||
|
|
||||||
|
#define MAX_UTF_CODE_POINT 0x10ffff
|
||||||
|
|
||||||
/* Compile-time errors are added to this value. As they are documented, it
|
/* Compile-time errors are added to this value. As they are documented, it
|
||||||
should probably never be changed. */
|
should probably never be changed. */
|
||||||
|
|
||||||
|
@ -574,9 +578,6 @@ total length. */
|
||||||
#define tables_length (ctypes_offset + 256)
|
#define tables_length (ctypes_offset + 256)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* -------------------- Character and string names ------------------------ */
|
/* -------------------- Character and string names ------------------------ */
|
||||||
|
|
||||||
/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
|
||||||
|
|
|
@ -279,9 +279,8 @@ static void
|
||||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR codestart, nametable, code;
|
PCRE2_SPTR codestart, nametable, code;
|
||||||
uint32_t options = re->compile_options;
|
|
||||||
uint32_t nesize = re->name_entry_size;
|
uint32_t nesize = re->name_entry_size;
|
||||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||||
|
|
||||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||||
code = codestart = nametable + re->name_count * re->name_entry_size;
|
code = codestart = nametable + re->name_count * re->name_entry_size;
|
||||||
|
|
|
@ -125,7 +125,7 @@ PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||||
|
|
|
@ -147,8 +147,6 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||||
const char *message, *addmessage;
|
const char *message, *addmessage;
|
||||||
size_t length, addlength;
|
size_t length, addlength;
|
||||||
|
|
||||||
errcode -= COMPILE_ERROR_BASE;
|
|
||||||
|
|
||||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||||
"unknown error code" : pstring[errcode];
|
"unknown error code" : pstring[errcode];
|
||||||
length = strlen(message) + 1;
|
length = strlen(message) + 1;
|
||||||
|
@ -237,8 +235,8 @@ if (preg->re_pcre2_code == NULL)
|
||||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||||
preg->re_nsub = (size_t)re_nsub;
|
preg->re_nsub = (size_t)re_nsub;
|
||||||
preg->re_match_data = ((cflags & REG_NOSUB) != 0)? NULL :
|
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||||
pcre2_match_data_create(re_nsub + 1, NULL);
|
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -497,7 +497,7 @@ static modstruct modlist[] = {
|
||||||
#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
|
#define POSIX_SUPPORTED_MATCH_OPTIONS ( \
|
||||||
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
|
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
|
||||||
|
|
||||||
#define POSIX_SUPPORTED_MATCH_CONTROLS ( 0 )
|
#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
|
||||||
|
|
||||||
/* Table of single-character abbreviated modifiers. The index field is
|
/* Table of single-character abbreviated modifiers. The index field is
|
||||||
initialized to -1, but the first time the modifier is encountered, it is filled
|
initialized to -1, but the first time the modifier is encountered, it is filled
|
||||||
|
@ -2884,7 +2884,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
|
||||||
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
|
||||||
((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||||
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
|
((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
|
||||||
((options & PCRE2_UTF) != 0)? " utf" : "",
|
((options & PCRE2_UTF) != 0)? " utf" : "",
|
||||||
((options & PCRE2_UCP) != 0)? " ucp" : "",
|
((options & PCRE2_UCP) != 0)? " ucp" : "",
|
||||||
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
|
||||||
|
@ -3884,7 +3884,7 @@ static int
|
||||||
callout_function(pcre2_callout_block_8 *cb)
|
callout_function(pcre2_callout_block_8 *cb)
|
||||||
{
|
{
|
||||||
uint32_t i, pre_start, post_start, subject_length;
|
uint32_t i, pre_start, post_start, subject_length;
|
||||||
BOOL utf = (FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
|
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
|
||||||
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
|
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
|
||||||
FILE *f = (first_callout || callout_capture)? outfile : NULL;
|
FILE *f = (first_callout || callout_capture)? outfile : NULL;
|
||||||
|
|
||||||
|
@ -4033,8 +4033,10 @@ dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
|
||||||
|
|
||||||
/* Initialize for scanning the data line. */
|
/* Initialize for scanning the data line. */
|
||||||
|
|
||||||
utf = (pat_patctl.control & CTL_POSIX) == 0 &&
|
utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
|
||||||
(FLD(compiled_code, compile_options) & PCRE2_UTF) != 0;
|
((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
|
||||||
|
FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
|
||||||
|
|
||||||
start_rep = NULL;
|
start_rep = NULL;
|
||||||
len = strlen((const char *)buffer);
|
len = strlen((const char *)buffer);
|
||||||
while (len > 0 && isspace(buffer[len-1])) len--;
|
while (len > 0 && isspace(buffer[len-1])) len--;
|
||||||
|
@ -4043,7 +4045,7 @@ p = buffer;
|
||||||
while (isspace(*p)) p++;
|
while (isspace(*p)) p++;
|
||||||
|
|
||||||
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
|
/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
|
||||||
invalid input to pcre2_exec, you must use \x?? or \x{} sequences. */
|
invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
|
||||||
|
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
|
@ -4414,14 +4416,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%2d: ", (int)i);
|
fprintf(outfile, "%2d: ", (int)i);
|
||||||
PCHARSV(dbuffer, pmatch[i].rm_so,
|
PCHARSV(dbuffer, pmatch[i].rm_so,
|
||||||
pmatch[i].rm_eo - pmatch[i].rm_so, FALSE, outfile);
|
pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
|
||||||
fprintf(outfile, "\n");
|
fprintf(outfile, "\n");
|
||||||
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
|
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
|
||||||
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
|
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%2d+ ", (int)i);
|
fprintf(outfile, "%2d+ ", (int)i);
|
||||||
PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
|
PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
|
||||||
FALSE, outfile);
|
utf, outfile);
|
||||||
fprintf(outfile, "\n");
|
fprintf(outfile, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5587,7 +5589,7 @@ while (notdone)
|
||||||
rc = process_command();
|
rc = process_command();
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (strchr("\"/!'`-+=:;.,", *p) != NULL)
|
else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
|
||||||
{
|
{
|
||||||
rc = process_pattern();
|
rc = process_pattern();
|
||||||
dfa_matched = 0;
|
dfa_matched = 0;
|
||||||
|
|
|
@ -0,0 +1,398 @@
|
||||||
|
# This set of tests is for UTF-8 support and Unicode property support, with
|
||||||
|
# relevance only for the 8-bit library.
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{1234}
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{1234}YZ
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
X\x{1234}\x{512}YZ
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{1234}
|
||||||
|
X\x{1234}YZ
|
||||||
|
X\x{1234}\x{512}
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
a\nb
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{100}b
|
||||||
|
** Failers
|
||||||
|
a\x{12257}b
|
||||||
|
|
||||||
|
/[Ã]/utf
|
||||||
|
|
||||||
|
/Ã/utf
|
||||||
|
|
||||||
|
/ÃÃÃxxx/utf
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
\xdf
|
||||||
|
\xef
|
||||||
|
\xef\x80
|
||||||
|
\xf7
|
||||||
|
\xf7\x80
|
||||||
|
\xf7\x80\x80
|
||||||
|
\xfb
|
||||||
|
\xfb\x80
|
||||||
|
\xfb\x80\x80
|
||||||
|
\xfb\x80\x80\x80
|
||||||
|
\xfd
|
||||||
|
\xfd\x80
|
||||||
|
\xfd\x80\x80
|
||||||
|
\xfd\x80\x80\x80
|
||||||
|
\xfd\x80\x80\x80\x80
|
||||||
|
\xdf\x7f
|
||||||
|
\xef\x7f\x80
|
||||||
|
\xef\x80\x7f
|
||||||
|
\xf7\x7f\x80\x80
|
||||||
|
\xf7\x80\x7f\x80
|
||||||
|
\xf7\x80\x80\x7f
|
||||||
|
\xfb\x7f\x80\x80\x80
|
||||||
|
\xfb\x80\x7f\x80\x80
|
||||||
|
\xfb\x80\x80\x7f\x80
|
||||||
|
\xfb\x80\x80\x80\x7f
|
||||||
|
\xfd\x7f\x80\x80\x80\x80
|
||||||
|
\xfd\x80\x7f\x80\x80\x80
|
||||||
|
\xfd\x80\x80\x7f\x80\x80
|
||||||
|
\xfd\x80\x80\x80\x7f\x80
|
||||||
|
\xfd\x80\x80\x80\x80\x7f
|
||||||
|
\xed\xa0\x80
|
||||||
|
\xc0\x8f
|
||||||
|
\xe0\x80\x8f
|
||||||
|
\xf0\x80\x80\x8f
|
||||||
|
\xf8\x80\x80\x80\x8f
|
||||||
|
\xfc\x80\x80\x80\x80\x8f
|
||||||
|
\x80
|
||||||
|
\xfe
|
||||||
|
\xff
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
\xfb\x80\x80\x80\x80
|
||||||
|
\xfd\x80\x80\x80\x80\x80
|
||||||
|
\xf7\xbf\xbf\xbf
|
||||||
|
|
||||||
|
/shortutf/utf
|
||||||
|
\xdf\=ph
|
||||||
|
\xef\=ph
|
||||||
|
\xef\x80\=ph
|
||||||
|
\xf7\=ph
|
||||||
|
\xf7\x80\=ph
|
||||||
|
\xf7\x80\x80\=ph
|
||||||
|
\xfb\=ph
|
||||||
|
\xfb\x80\=ph
|
||||||
|
\xfb\x80\x80\=ph
|
||||||
|
\xfb\x80\x80\x80\=ph
|
||||||
|
\xfd\=ph
|
||||||
|
\xfd\x80\=ph
|
||||||
|
\xfd\x80\x80\=ph
|
||||||
|
\xfd\x80\x80\x80\=ph
|
||||||
|
\xfd\x80\x80\x80\x80\=ph
|
||||||
|
|
||||||
|
/anything/utf
|
||||||
|
\xc0\x80
|
||||||
|
\xc1\x8f
|
||||||
|
\xe0\x9f\x80
|
||||||
|
\xf0\x8f\x80\x80
|
||||||
|
\xf8\x87\x80\x80\x80
|
||||||
|
\xfc\x83\x80\x80\x80\x80
|
||||||
|
\xfe\x80\x80\x80\x80\x80
|
||||||
|
\xff\x80\x80\x80\x80\x80
|
||||||
|
\xc3\x8f
|
||||||
|
\xe0\xaf\x80
|
||||||
|
\xe1\x80\x80
|
||||||
|
\xf0\x9f\x80\x80
|
||||||
|
\xf1\x8f\x80\x80
|
||||||
|
\xf8\x88\x80\x80\x80
|
||||||
|
\xf9\x87\x80\x80\x80
|
||||||
|
\xfc\x84\x80\x80\x80\x80
|
||||||
|
\xfd\x83\x80\x80\x80\x80
|
||||||
|
\xf8\x88\x80\x80\x80\=no_utf_check
|
||||||
|
\xf9\x87\x80\x80\x80\=no_utf_check
|
||||||
|
\xfc\x84\x80\x80\x80\x80\=no_utf_check
|
||||||
|
\xfd\x83\x80\x80\x80\x80\=no_utf_check
|
||||||
|
|
||||||
|
/\x{100}/IB,utf
|
||||||
|
|
||||||
|
/\x{1000}/IB,utf
|
||||||
|
|
||||||
|
/\x{10000}/IB,utf
|
||||||
|
|
||||||
|
/\x{100000}/IB,utf
|
||||||
|
|
||||||
|
/\x{10ffff}/IB,utf
|
||||||
|
|
||||||
|
/[\x{ff}]/IB,utf
|
||||||
|
|
||||||
|
/[\x{100}]/IB,utf
|
||||||
|
|
||||||
|
/\x80/IB,utf
|
||||||
|
|
||||||
|
/\xff/IB,utf
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
|
||||||
|
\x{D55c}\x{ad6d}\x{C5B4}
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
|
||||||
|
\x{65e5}\x{672c}\x{8a9e}
|
||||||
|
|
||||||
|
/\x{80}/IB,utf
|
||||||
|
|
||||||
|
/\x{084}/IB,utf
|
||||||
|
|
||||||
|
/\x{104}/IB,utf
|
||||||
|
|
||||||
|
/\x{861}/IB,utf
|
||||||
|
|
||||||
|
/\x{212ab}/IB,utf
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||||
|
# can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
X\nabc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
*** Failers
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
|
\x{f1}
|
||||||
|
\x{bf}
|
||||||
|
\x{100}
|
||||||
|
\x{1000}
|
||||||
|
*** Failers
|
||||||
|
\x{c0}
|
||||||
|
\x{f0}
|
||||||
|
|
||||||
|
/Ä€{3,4}/IB,utf
|
||||||
|
\x{100}\x{100}\x{100}\x{100\x{100}
|
||||||
|
|
||||||
|
/(\x{100}+|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}*a|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}{0,2}a|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}{1,2}a|x)/IB,utf
|
||||||
|
|
||||||
|
/\x{100}/IB,utf
|
||||||
|
|
||||||
|
/a\x{100}\x{101}*/IB,utf
|
||||||
|
|
||||||
|
/a\x{100}\x{101}+/IB,utf
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB
|
||||||
|
|
||||||
|
/[\x{100}]/IB,utf
|
||||||
|
\x{100}
|
||||||
|
Z\x{100}
|
||||||
|
\x{100}Z
|
||||||
|
*** Failers
|
||||||
|
|
||||||
|
/[\xff]/IB,utf
|
||||||
|
>\x{ff}<
|
||||||
|
|
||||||
|
/[^\xff]/IB,utf
|
||||||
|
|
||||||
|
/\x{100}abc(xyz(?1))/IB,utf
|
||||||
|
|
||||||
|
/a\x{1234}b/utf,posix
|
||||||
|
a\x{1234}b
|
||||||
|
|
||||||
|
/\777/I,utf
|
||||||
|
\x{1ff}
|
||||||
|
\777
|
||||||
|
|
||||||
|
/\x{100}+\x{200}/IB,utf
|
||||||
|
|
||||||
|
/\x{100}+X/IB,utf
|
||||||
|
|
||||||
|
/^[\QÄ€\E-\QÅ<51>\E/B,utf
|
||||||
|
|
||||||
|
# This tests the stricter UTF-8 check according to RFC 3629.
|
||||||
|
|
||||||
|
/X/utf
|
||||||
|
\x{d800}
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
\x{da00}
|
||||||
|
\x{da00}\=no_utf_check
|
||||||
|
\x{dfff}
|
||||||
|
\x{dfff}\=no_utf_check
|
||||||
|
\x{110000}
|
||||||
|
\x{110000}\=no_utf_check
|
||||||
|
\x{2000000}
|
||||||
|
\x{2000000}\=no_utf_check
|
||||||
|
\x{7fffffff}
|
||||||
|
\x{7fffffff}\=no_utf_check
|
||||||
|
|
||||||
|
/(*UTF8)\x{1234}/
|
||||||
|
abcd\x{1234}pqr
|
||||||
|
|
||||||
|
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
||||||
|
|
||||||
|
/\h/I,utf
|
||||||
|
ABC\x{09}
|
||||||
|
ABC\x{20}
|
||||||
|
ABC\x{a0}
|
||||||
|
ABC\x{1680}
|
||||||
|
ABC\x{180e}
|
||||||
|
ABC\x{2000}
|
||||||
|
ABC\x{202f}
|
||||||
|
ABC\x{205f}
|
||||||
|
ABC\x{3000}
|
||||||
|
|
||||||
|
/\v/I,utf
|
||||||
|
ABC\x{0a}
|
||||||
|
ABC\x{0b}
|
||||||
|
ABC\x{0c}
|
||||||
|
ABC\x{0d}
|
||||||
|
ABC\x{85}
|
||||||
|
ABC\x{2028}
|
||||||
|
|
||||||
|
/\h*A/I,utf
|
||||||
|
CDBABC
|
||||||
|
|
||||||
|
/\v+A/I,utf
|
||||||
|
|
||||||
|
/\s?xxx\s/I,utf
|
||||||
|
|
||||||
|
/\sxxx\s/I,utf,tables=2
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
AB\x{a0}xxx\x{85}XYZ
|
||||||
|
|
||||||
|
/\S \S/I,utf,tables=2
|
||||||
|
\x{a2} \x{84}
|
||||||
|
A Z
|
||||||
|
|
||||||
|
/a+/utf
|
||||||
|
a\x{123}aa\=offset=1
|
||||||
|
a\x{123}aa\=offset=2
|
||||||
|
a\x{123}aa\=offset=3
|
||||||
|
a\x{123}aa\=offset=4
|
||||||
|
a\x{123}aa\=offset=5
|
||||||
|
a\x{123}aa\=offset=6
|
||||||
|
|
||||||
|
/\x{1234}+/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}+?/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}++/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}{2}/Ii,utf
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB,utf
|
||||||
|
|
||||||
|
/X+\x{200}/IB,utf
|
||||||
|
|
||||||
|
/\R/I,utf
|
||||||
|
|
||||||
|
/\777/IB,utf
|
||||||
|
|
||||||
|
/\w+\x{C4}/B,utf
|
||||||
|
a\x{C4}\x{C4}
|
||||||
|
|
||||||
|
/\w+\x{C4}/B,utf,tables=2
|
||||||
|
a\x{C4}\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{C4}/B,utf
|
||||||
|
!\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{C4}/B,utf,tables=2
|
||||||
|
!\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{A1}/B,utf
|
||||||
|
!\x{A1}
|
||||||
|
|
||||||
|
/\W+\x{A1}/B,utf,tables=2
|
||||||
|
!\x{A1}
|
||||||
|
|
||||||
|
/X\s+\x{A0}/B,utf
|
||||||
|
X\x20\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/X\s+\x{A0}/B,utf,tables=2
|
||||||
|
X\x20\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\S+\x{A0}/B,utf
|
||||||
|
X\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\S+\x{A0}/B,utf,tables=2
|
||||||
|
X\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\x{a0}+\s!/B,utf
|
||||||
|
\x{a0}\x20!
|
||||||
|
|
||||||
|
/\x{a0}+\s!/B,utf,tables=2
|
||||||
|
\x{a0}\x20!
|
||||||
|
|
||||||
|
/A/utf
|
||||||
|
\x{ff000041}
|
||||||
|
\x{7f000041}
|
||||||
|
|
||||||
|
/(*UTF8)abc/never_utf
|
||||||
|
|
||||||
|
/abc/utf,never_utf
|
||||||
|
|
||||||
|
/\w/posix
|
||||||
|
+++\x{c2}
|
||||||
|
|
||||||
|
/\w/ucp,posix
|
||||||
|
+++\x{c2}
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
|
||||||
|
|
||||||
|
/AB\x{1fb0}/IB,utf
|
||||||
|
|
||||||
|
/AB\x{1fb0}/IBi,utf
|
||||||
|
|
||||||
|
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
|
||||||
|
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||||||
|
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||||||
|
|
||||||
|
/[â±¥]/Bi,utf
|
||||||
|
|
||||||
|
/[^â±¥]/Bi,utf
|
||||||
|
|
||||||
|
/\h/I
|
||||||
|
|
||||||
|
/\v/I
|
||||||
|
|
||||||
|
/\R/I
|
||||||
|
|
||||||
|
/[[:blank:]]/B,ucp
|
||||||
|
|
||||||
|
/\x{212a}+/Ii,utf
|
||||||
|
KKkk\x{212a}
|
||||||
|
|
||||||
|
/s+/Ii,utf
|
||||||
|
SSss\x{17f}
|
||||||
|
|
||||||
|
# End of testinput10
|
|
@ -0,0 +1,357 @@
|
||||||
|
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||||
|
# features that are not compatible with the 8-bit library, or which give
|
||||||
|
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||||
|
# different, so they have separate output files.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
aXb
|
||||||
|
a\nb
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB
|
||||||
|
|
||||||
|
/\x{100}/I
|
||||||
|
|
||||||
|
/ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional leading comment
|
||||||
|
(?: (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address
|
||||||
|
| # or
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # one word, optionally followed by....
|
||||||
|
(?:
|
||||||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||||
|
\(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) | # comments, or...
|
||||||
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
# quoted strings
|
||||||
|
)*
|
||||||
|
< (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # leading <
|
||||||
|
(?: @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* , (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
)* # further okay, if led by comma
|
||||||
|
: # closing colon
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* )? # optional route
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address spec
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* > # trailing >
|
||||||
|
# name and address
|
||||||
|
) (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional trailing comment
|
||||||
|
/Ix
|
||||||
|
|
||||||
|
/[\h]/B
|
||||||
|
>\x09<
|
||||||
|
|
||||||
|
/[\h]+/B
|
||||||
|
>\x09\x20\xa0<
|
||||||
|
|
||||||
|
/[\v]/B
|
||||||
|
|
||||||
|
/[^\h]/B
|
||||||
|
|
||||||
|
/\h+/I
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/[\h\x{dc00}]+/IB
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/\H+/I
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/[\H\x{d800}]+/
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/\v+/I
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/[\v\x{dc00}]+/IB
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\V+/I
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/[\V\x{d800}]+/
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/\R+/I,bsr=unicode
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||||
|
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||||
|
XX
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||||
|
XX
|
||||||
|
|
||||||
|
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
|
||||||
|
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
|
||||||
|
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
|
||||||
|
/^\x{ffff}+/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}?/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}*/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{3}/i
|
||||||
|
\x{ffff}\x{ffff}\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{0,3}/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||||
|
|
||||||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
|
||||||
|
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||||
|
|
||||||
|
/^[\x{1234}\x{4321}]{2,4}?/
|
||||||
|
\x{1234}\x{1234}\x{1234}
|
||||||
|
|
||||||
|
# Check maximum non-UTF character size for the 16-bit library.
|
||||||
|
|
||||||
|
/\x{ffff}/
|
||||||
|
A\x{ffff}B
|
||||||
|
|
||||||
|
/\x{10000}/
|
||||||
|
|
||||||
|
/\o{20000}/
|
||||||
|
|
||||||
|
# Check maximum character size for the 32-bit library. These will all give
|
||||||
|
# errors in the 16-bit library.
|
||||||
|
|
||||||
|
/\x{110000}/
|
||||||
|
|
||||||
|
/\x{7fffffff}/
|
||||||
|
|
||||||
|
/\x{80000000}/
|
||||||
|
|
||||||
|
/\x{ffffffff}/
|
||||||
|
|
||||||
|
/\x{100000000}/
|
||||||
|
|
||||||
|
/\o{17777777777}/
|
||||||
|
|
||||||
|
/\o{20000000000}/
|
||||||
|
|
||||||
|
/\o{37777777777}/
|
||||||
|
|
||||||
|
/\o{40000000000}/
|
||||||
|
|
||||||
|
/\x{7fffffff}\x{7fffffff}/I
|
||||||
|
|
||||||
|
/\x{80000000}\x{80000000}/I
|
||||||
|
|
||||||
|
/\x{ffffffff}\x{ffffffff}/I
|
||||||
|
|
||||||
|
# Non-UTF characters
|
||||||
|
|
||||||
|
/\C{2,3}/
|
||||||
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
|
|
||||||
|
/\x{400000}\x{800000}/IBi
|
||||||
|
|
||||||
|
# Check character ranges
|
||||||
|
|
||||||
|
/[\H]/IB
|
||||||
|
|
||||||
|
/[\V]/IB
|
||||||
|
|
||||||
|
# End of testinput11
|
|
@ -0,0 +1,332 @@
|
||||||
|
# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to
|
||||||
|
# the 16-bit and 32-bit libraries. The output is different for each library,
|
||||||
|
# so there are separate output files.
|
||||||
|
|
||||||
|
/ÃÃÃxxx/IB,utf,no_utf_check
|
||||||
|
|
||||||
|
/abc/utf
|
||||||
|
Ã]
|
||||||
|
|
||||||
|
/X(\C{3})/utf
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
|
||||||
|
/X(\C{4})/utf
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}YZW
|
||||||
|
|
||||||
|
/X\C*/utf
|
||||||
|
XYZabcdce
|
||||||
|
|
||||||
|
/X\C*?/utf
|
||||||
|
XYZabcde
|
||||||
|
|
||||||
|
/X\C{3,5}/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
X\x{11234}\x{512}\x{11234}Z
|
||||||
|
|
||||||
|
/X\C{3,5}?/utf
|
||||||
|
Xabcdefg
|
||||||
|
X\x{11234}Y
|
||||||
|
X\x{11234}YZ
|
||||||
|
X\x{11234}\x{512}YZ
|
||||||
|
*** Failers
|
||||||
|
X\x{11234}
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
aXb
|
||||||
|
a\nb
|
||||||
|
|
||||||
|
/a\C\Cb/utf
|
||||||
|
a\x{12257}b
|
||||||
|
a\x{12257}\x{11234}b
|
||||||
|
** Failers
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
/ab\Cde/utf
|
||||||
|
abXde
|
||||||
|
|
||||||
|
# Check maximum character size
|
||||||
|
|
||||||
|
/\x{ffff}/IB,utf
|
||||||
|
|
||||||
|
/\x{10000}/IB,utf
|
||||||
|
|
||||||
|
/\x{100}/IB,utf
|
||||||
|
|
||||||
|
/\x{1000}/IB,utf
|
||||||
|
|
||||||
|
/\x{10000}/IB,utf
|
||||||
|
|
||||||
|
/\x{100000}/IB,utf
|
||||||
|
|
||||||
|
/\x{10ffff}/IB,utf
|
||||||
|
|
||||||
|
/[\x{ff}]/IB,utf
|
||||||
|
|
||||||
|
/[\x{100}]/IB,utf
|
||||||
|
|
||||||
|
/\x80/IB,utf
|
||||||
|
|
||||||
|
/\xff/IB,utf
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
|
||||||
|
\x{D55c}\x{ad6d}\x{C5B4}
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
|
||||||
|
\x{65e5}\x{672c}\x{8a9e}
|
||||||
|
|
||||||
|
/\x{80}/IB,utf
|
||||||
|
|
||||||
|
/\x{084}/IB,utf
|
||||||
|
|
||||||
|
/\x{104}/IB,utf
|
||||||
|
|
||||||
|
/\x{861}/IB,utf
|
||||||
|
|
||||||
|
/\x{212ab}/IB,utf
|
||||||
|
|
||||||
|
# This one is here not because it's different to Perl, but because the way
|
||||||
|
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||||
|
# can't tell the difference.)
|
||||||
|
|
||||||
|
/X(\C)(.*)/utf
|
||||||
|
X\x{1234}
|
||||||
|
X\nabc
|
||||||
|
|
||||||
|
# This one is here because Perl gives out a grumbly error message (quite
|
||||||
|
# correctly, but that messes up comparisons).
|
||||||
|
|
||||||
|
/a\Cb/utf
|
||||||
|
*** Failers
|
||||||
|
a\x{100}b
|
||||||
|
|
||||||
|
/[^ab\xC0-\xF0]/IB,utf
|
||||||
|
\x{f1}
|
||||||
|
\x{bf}
|
||||||
|
\x{100}
|
||||||
|
\x{1000}
|
||||||
|
*** Failers
|
||||||
|
\x{c0}
|
||||||
|
\x{f0}
|
||||||
|
|
||||||
|
/Ä€{3,4}/IB,utf
|
||||||
|
\x{100}\x{100}\x{100}\x{100\x{100}
|
||||||
|
|
||||||
|
/(\x{100}+|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}*a|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}{0,2}a|x)/IB,utf
|
||||||
|
|
||||||
|
/(\x{100}{1,2}a|x)/IB,utf
|
||||||
|
|
||||||
|
/\x{100}/IB,utf
|
||||||
|
|
||||||
|
/a\x{100}\x{101}*/IB,utf
|
||||||
|
|
||||||
|
/a\x{100}\x{101}+/IB,utf
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB
|
||||||
|
|
||||||
|
/[\x{100}]/IB,utf
|
||||||
|
\x{100}
|
||||||
|
Z\x{100}
|
||||||
|
\x{100}Z
|
||||||
|
*** Failers
|
||||||
|
|
||||||
|
/[\xff]/IB,utf
|
||||||
|
>\x{ff}<
|
||||||
|
|
||||||
|
/[^\xff]/IB,utf
|
||||||
|
|
||||||
|
/\x{100}abc(xyz(?1))/IB,utf
|
||||||
|
|
||||||
|
/\777/I,utf
|
||||||
|
\x{1ff}
|
||||||
|
\777
|
||||||
|
|
||||||
|
/\x{100}+\x{200}/IB,utf
|
||||||
|
|
||||||
|
/\x{100}+X/IB,utf
|
||||||
|
|
||||||
|
/^[\QÄ€\E-\QÅ<51>\E/B,utf
|
||||||
|
|
||||||
|
/X/utf
|
||||||
|
\x{d800}
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
\x{da00}
|
||||||
|
\x{da00}\=no_utf_check
|
||||||
|
\x{dc00}
|
||||||
|
\x{dc00}\=no_utf_check
|
||||||
|
\x{de00}
|
||||||
|
\x{de00}\=no_utf_check
|
||||||
|
\x{dfff}
|
||||||
|
\x{dfff}\=no_utf_check
|
||||||
|
\x{110000}
|
||||||
|
\x{d800}\x{1234}
|
||||||
|
|
||||||
|
/(*UTF16)\x{11234}/
|
||||||
|
abcd\x{11234}pqr
|
||||||
|
|
||||||
|
/(*UTF)\x{11234}/I
|
||||||
|
abcd\x{11234}pqr
|
||||||
|
|
||||||
|
/(*UTF-32)\x{11234}/
|
||||||
|
abcd\x{11234}pqr
|
||||||
|
|
||||||
|
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
|
||||||
|
|
||||||
|
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
|
||||||
|
|
||||||
|
/\h/I,utf
|
||||||
|
ABC\x{09}
|
||||||
|
ABC\x{20}
|
||||||
|
ABC\x{a0}
|
||||||
|
ABC\x{1680}
|
||||||
|
ABC\x{180e}
|
||||||
|
ABC\x{2000}
|
||||||
|
ABC\x{202f}
|
||||||
|
ABC\x{205f}
|
||||||
|
ABC\x{3000}
|
||||||
|
|
||||||
|
/\v/I,utf
|
||||||
|
ABC\x{0a}
|
||||||
|
ABC\x{0b}
|
||||||
|
ABC\x{0c}
|
||||||
|
ABC\x{0d}
|
||||||
|
ABC\x{85}
|
||||||
|
ABC\x{2028}
|
||||||
|
|
||||||
|
/\h*A/I,utf
|
||||||
|
CDBABC
|
||||||
|
\x{2000}ABC
|
||||||
|
|
||||||
|
/\R*A/I,bsr=unicode,utf
|
||||||
|
CDBABC
|
||||||
|
\x{2028}A
|
||||||
|
|
||||||
|
/\v+A/I,utf
|
||||||
|
|
||||||
|
/\s?xxx\s/I,utf
|
||||||
|
|
||||||
|
/\sxxx\s/I,utf,tables=2
|
||||||
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
|
AB\x{a0}xxx\x{85}XYZ
|
||||||
|
|
||||||
|
/\S \S/I,utf,tables=2
|
||||||
|
\x{a2} \x{84}
|
||||||
|
A Z
|
||||||
|
|
||||||
|
/a+/utf
|
||||||
|
a\x{123}aa\=offset=1
|
||||||
|
a\x{123}aa\=offset=2
|
||||||
|
a\x{123}aa\=offset=3
|
||||||
|
a\x{123}aa\=offset=4
|
||||||
|
a\x{123}aa\=offset=5
|
||||||
|
a\x{123}aa\=offset=6
|
||||||
|
|
||||||
|
/\x{1234}+/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}+?/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}++/Ii,utf
|
||||||
|
|
||||||
|
/\x{1234}{2}/Ii,utf
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB,utf
|
||||||
|
|
||||||
|
/X+\x{200}/IB,utf
|
||||||
|
|
||||||
|
/\R/I,utf
|
||||||
|
|
||||||
|
# Check bad offset
|
||||||
|
|
||||||
|
/a/utf
|
||||||
|
\x{10000}\=offset=1
|
||||||
|
\x{10000}ab\=offset=1
|
||||||
|
\x{10000}ab\=offset=2
|
||||||
|
\x{10000}ab\=offset=3
|
||||||
|
\x{10000}ab\=offset=4
|
||||||
|
\x{10000}ab\=offset=5
|
||||||
|
|
||||||
|
/í¼€/utf
|
||||||
|
|
||||||
|
/\w+\x{C4}/B,utf
|
||||||
|
a\x{C4}\x{C4}
|
||||||
|
|
||||||
|
/\w+\x{C4}/B,utf,tables=2
|
||||||
|
a\x{C4}\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{C4}/B,utf
|
||||||
|
!\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{C4}/B,utf,tables=2
|
||||||
|
!\x{C4}
|
||||||
|
|
||||||
|
/\W+\x{A1}/B,utf
|
||||||
|
!\x{A1}
|
||||||
|
|
||||||
|
/\W+\x{A1}/B,utf,tables=2
|
||||||
|
!\x{A1}
|
||||||
|
|
||||||
|
/X\s+\x{A0}/B,utf
|
||||||
|
X\x20\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/X\s+\x{A0}/B,utf,tables=2
|
||||||
|
X\x20\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\S+\x{A0}/B,utf
|
||||||
|
X\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\S+\x{A0}/B,utf,tables=2
|
||||||
|
X\x{A0}\x{A0}
|
||||||
|
|
||||||
|
/\x{a0}+\s!/B,utf
|
||||||
|
\x{a0}\x20!
|
||||||
|
|
||||||
|
/\x{a0}+\s!/B,utf,tables=2
|
||||||
|
\x{a0}\x20!
|
||||||
|
|
||||||
|
/(*UTF)abc/never_utf
|
||||||
|
|
||||||
|
/abc/utf,never_utf
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
|
||||||
|
|
||||||
|
/AB\x{1fb0}/IB,utf
|
||||||
|
|
||||||
|
/AB\x{1fb0}/IBi,utf
|
||||||
|
|
||||||
|
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
|
||||||
|
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||||||
|
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||||||
|
|
||||||
|
/[â±¥]/Bi,utf
|
||||||
|
|
||||||
|
/[^â±¥]/Bi,utf
|
||||||
|
|
||||||
|
/[[:blank:]]/B,ucp
|
||||||
|
|
||||||
|
/\x{212a}+/Ii,utf
|
||||||
|
KKkk\x{212a}
|
||||||
|
|
||||||
|
/s+/Ii,utf
|
||||||
|
SSss\x{17f}
|
||||||
|
|
||||||
|
# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
|
||||||
|
|
||||||
|
/\x{110000}/utf
|
||||||
|
|
||||||
|
/\o{4200000}/utf
|
||||||
|
|
||||||
|
/\C/utf
|
||||||
|
\x{110000}
|
||||||
|
|
||||||
|
# End of testinput12
|
|
@ -0,0 +1,22 @@
|
||||||
|
# These DFA tests are for the handling of characters greater than 255 in
|
||||||
|
# 16-bit or 32-bit, non-UTF mode.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
#subject dfa
|
||||||
|
|
||||||
|
/^\x{ffff}+/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}?/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}*/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{3}/i
|
||||||
|
\x{ffff}\x{ffff}\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{0,3}/i
|
||||||
|
\x{ffff}
|
||||||
|
|
||||||
|
# End of testinput13
|
|
@ -1593,7 +1593,7 @@ a random value. /Ix
|
||||||
abc\rdef
|
abc\rdef
|
||||||
abc\r\ndef
|
abc\r\ndef
|
||||||
|
|
||||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
|
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||||
/* this is a C style comment */\=find_limits
|
/* this is a C style comment */\=find_limits
|
||||||
|
|
||||||
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I
|
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I
|
||||||
|
|
|
@ -573,11 +573,11 @@
|
||||||
/X\W{3}X/utf
|
/X\W{3}X/utf
|
||||||
X\=ps
|
X\=ps
|
||||||
|
|
||||||
/\sxxx\s/utf,tables=1
|
/\sxxx\s/utf,tables=2
|
||||||
AB\x{85}xxx\x{a0}XYZ
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
AB\x{a0}xxx\x{85}XYZ
|
AB\x{a0}xxx\x{85}XYZ
|
||||||
|
|
||||||
/\S \S/utf,tables=1
|
/\S \S/utf,tables=2
|
||||||
\x{a2} \x{84}
|
\x{a2} \x{84}
|
||||||
|
|
||||||
'A#хц'Bx,newline=any,utf
|
'A#хц'Bx,newline=any,utf
|
||||||
|
|
|
@ -0,0 +1,141 @@
|
||||||
|
# These are a few representative patterns whose lengths and offsets are to be
|
||||||
|
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||||
|
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||||
|
# are all themselves checked in other tests. Unicode, including property
|
||||||
|
# support, is required for these tests.
|
||||||
|
|
||||||
|
#pattern fullbincode,memory
|
||||||
|
|
||||||
|
/((?i)b)/
|
||||||
|
|
||||||
|
/(?s)(.*X|^B)/
|
||||||
|
|
||||||
|
/(?s:.*X|^B)/
|
||||||
|
|
||||||
|
/^[[:alnum:]]/
|
||||||
|
|
||||||
|
/#/Ix
|
||||||
|
|
||||||
|
/a#/Ix
|
||||||
|
|
||||||
|
/x?+/
|
||||||
|
|
||||||
|
/x++/
|
||||||
|
|
||||||
|
/x{1,3}+/
|
||||||
|
|
||||||
|
/(x)*+/
|
||||||
|
|
||||||
|
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||||
|
|
||||||
|
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
|
||||||
|
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
|
||||||
|
/(a(?1)b)/
|
||||||
|
|
||||||
|
/(a(?1)+b)/
|
||||||
|
|
||||||
|
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||||
|
|
||||||
|
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||||
|
|
||||||
|
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||||
|
|
||||||
|
/abc(?C255)de(?C)f/
|
||||||
|
|
||||||
|
/abcde/auto_callout
|
||||||
|
|
||||||
|
/\x{100}/utf
|
||||||
|
|
||||||
|
/\x{1000}/utf
|
||||||
|
|
||||||
|
/\x{10000}/utf
|
||||||
|
|
||||||
|
/\x{100000}/utf
|
||||||
|
|
||||||
|
/\x{10ffff}/utf
|
||||||
|
|
||||||
|
/\x{110000}/utf
|
||||||
|
|
||||||
|
/[\x{ff}]/utf
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
|
||||||
|
/\x80/utf
|
||||||
|
|
||||||
|
/\xff/utf
|
||||||
|
|
||||||
|
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
|
||||||
|
/[Z\x{100}]/utf
|
||||||
|
|
||||||
|
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E]/utf
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E/utf
|
||||||
|
|
||||||
|
/[\p{L}]/
|
||||||
|
|
||||||
|
/[\p{^L}]/
|
||||||
|
|
||||||
|
/[\P{L}]/
|
||||||
|
|
||||||
|
/[\P{^L}]/
|
||||||
|
|
||||||
|
/[abc\p{L}\x{0660}]/utf
|
||||||
|
|
||||||
|
/[\p{Nd}]/utf
|
||||||
|
|
||||||
|
/[\p{Nd}+-]+/utf
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||||
|
|
||||||
|
/[\x{105}-\x{109}]/i,utf
|
||||||
|
|
||||||
|
/( ( (?(1)0|) )* )/x
|
||||||
|
|
||||||
|
/( (?(1)0|)* )/x
|
||||||
|
|
||||||
|
/[a]/
|
||||||
|
|
||||||
|
/[a]/utf
|
||||||
|
|
||||||
|
/[\xaa]/
|
||||||
|
|
||||||
|
/[\xaa]/utf
|
||||||
|
|
||||||
|
/[^a]/
|
||||||
|
|
||||||
|
/[^a]/utf
|
||||||
|
|
||||||
|
/[^\xaa]/
|
||||||
|
|
||||||
|
/[^\xaa]/utf
|
||||||
|
|
||||||
|
#pattern -memory
|
||||||
|
|
||||||
|
/[^\d]/utf,ucp
|
||||||
|
|
||||||
|
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||||
|
|
||||||
|
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||||
|
|
||||||
|
/[[:alpha:]]+/utf,ucp
|
||||||
|
|
||||||
|
/[[:^alpha:]\S]+/utf,ucp
|
||||||
|
|
||||||
|
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||||
|
|
||||||
|
/(((a\2)|(a*)\g<-1>))*a?/
|
||||||
|
|
||||||
|
# End of testinput8
|
|
@ -0,0 +1,333 @@
|
||||||
|
# This set of tests is run only with the 8-bit library. They do not require
|
||||||
|
# UTF-8 or Unicode property support. The file starts with all the tests of
|
||||||
|
# the POSIX interface, because that is supported only with the 8-bit library.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
#pattern posix
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc
|
||||||
|
*** Failers
|
||||||
|
|
||||||
|
/^abc|def/
|
||||||
|
abcdef
|
||||||
|
abcdef\=notbol
|
||||||
|
|
||||||
|
/.*((abc)$|(def))/
|
||||||
|
defabc
|
||||||
|
defabc\=noteol
|
||||||
|
|
||||||
|
/the quick brown fox/
|
||||||
|
the quick brown fox
|
||||||
|
*** Failers
|
||||||
|
The Quick Brown Fox
|
||||||
|
|
||||||
|
/the quick brown fox/i
|
||||||
|
the quick brown fox
|
||||||
|
The Quick Brown Fox
|
||||||
|
|
||||||
|
/abc.def/
|
||||||
|
*** Failers
|
||||||
|
abc\ndef
|
||||||
|
|
||||||
|
/abc$/
|
||||||
|
abc
|
||||||
|
abc\n
|
||||||
|
|
||||||
|
/(abc)\2/
|
||||||
|
|
||||||
|
/(abc\1)/
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a*(b+)(z)(z)/
|
||||||
|
aaaabbbbzzzz
|
||||||
|
aaaabbbbzzzz\=ovector=0
|
||||||
|
aaaabbbbzzzz\=ovector=1
|
||||||
|
aaaabbbbzzzz\=ovector=2
|
||||||
|
|
||||||
|
/ab.cd/
|
||||||
|
ab-cd
|
||||||
|
ab=cd
|
||||||
|
** Failers
|
||||||
|
ab\ncd
|
||||||
|
|
||||||
|
/ab.cd/s
|
||||||
|
ab-cd
|
||||||
|
ab=cd
|
||||||
|
ab\ncd
|
||||||
|
|
||||||
|
/a(b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a(?P<name>b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
|
||||||
|
/a?|b?/
|
||||||
|
abc
|
||||||
|
** Failers
|
||||||
|
ddd\=notempty
|
||||||
|
|
||||||
|
/\w+A/
|
||||||
|
CDAAAAB
|
||||||
|
|
||||||
|
/\w+A/ungreedy
|
||||||
|
CDAAAAB
|
||||||
|
|
||||||
|
/\Biss\B/I,aftertext
|
||||||
|
Mississippi
|
||||||
|
|
||||||
|
/abc/\
|
||||||
|
|
||||||
|
#pattern -posix
|
||||||
|
|
||||||
|
# End of POSIX tests
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
aXb
|
||||||
|
a\nb
|
||||||
|
** Failers (too big char)
|
||||||
|
A\x{123}B
|
||||||
|
A\o{443}B
|
||||||
|
|
||||||
|
/\x{100}/I
|
||||||
|
|
||||||
|
/\o{400}/I
|
||||||
|
|
||||||
|
/ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional leading comment
|
||||||
|
(?: (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address
|
||||||
|
| # or
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # one word, optionally followed by....
|
||||||
|
(?:
|
||||||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||||
|
\(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) | # comments, or...
|
||||||
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
# quoted strings
|
||||||
|
)*
|
||||||
|
< (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # leading <
|
||||||
|
(?: @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* , (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
)* # further okay, if led by comma
|
||||||
|
: # closing colon
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* )? # optional route
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address spec
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* > # trailing >
|
||||||
|
# name and address
|
||||||
|
) (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional trailing comment
|
||||||
|
/Ix
|
||||||
|
|
||||||
|
/\h/I
|
||||||
|
|
||||||
|
/\H/I
|
||||||
|
|
||||||
|
/\v/I
|
||||||
|
|
||||||
|
/\V/I
|
||||||
|
|
||||||
|
/\R/I
|
||||||
|
|
||||||
|
/[\h]/B
|
||||||
|
>\x09<
|
||||||
|
|
||||||
|
/[\h]+/B
|
||||||
|
>\x09\x20\xa0<
|
||||||
|
|
||||||
|
/[\v]/B
|
||||||
|
|
||||||
|
/[\H]/B
|
||||||
|
|
||||||
|
/[^\h]/B
|
||||||
|
|
||||||
|
/[\V]/B
|
||||||
|
|
||||||
|
/[\x0a\V]/B
|
||||||
|
|
||||||
|
/\777/I
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||||
|
XX
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||||
|
XX
|
||||||
|
|
||||||
|
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||||
|
|
||||||
|
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||||
|
|
||||||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||||
|
|
||||||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
|
||||||
|
# End of testinput9
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,675 @@
|
||||||
|
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||||
|
# features that are not compatible with the 8-bit library, or which give
|
||||||
|
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||||
|
# different, so they have separate output files.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x0ab
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{c4}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\x{100}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{100}
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional leading comment
|
||||||
|
(?: (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address
|
||||||
|
| # or
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # one word, optionally followed by....
|
||||||
|
(?:
|
||||||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||||
|
\(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) | # comments, or...
|
||||||
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
# quoted strings
|
||||||
|
)*
|
||||||
|
< (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # leading <
|
||||||
|
(?: @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* , (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
)* # further okay, if led by comma
|
||||||
|
: # closing colon
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* )? # optional route
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address spec
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* > # trailing >
|
||||||
|
# name and address
|
||||||
|
) (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional trailing comment
|
||||||
|
/Ix
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Contains explicit CR or LF match
|
||||||
|
Options: extended
|
||||||
|
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||||
|
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||||
|
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/[\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09<
|
||||||
|
0: \x09
|
||||||
|
|
||||||
|
/[\h]+/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09\x20\xa0<
|
||||||
|
0: \x09 \xa0
|
||||||
|
|
||||||
|
/[\v]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\h+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x09 \x20 \xa0 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
0: \x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/[\h\x{dc00}]+/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x09 \x20 \xa0 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
0: \x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/\H+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
0: \x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
0: \x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/[\H\x{d800}]+/
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
0: \x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
0: \x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/\v+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/[\v\x{dc00}]+/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\V+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
0: \x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
0: \x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/[\V\x{d800}]+/
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
0: \x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
0: \x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/\R+/I,bsr=unicode
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{d800}
|
||||||
|
Last code unit = \x{dd00}
|
||||||
|
Subject length lower bound = 6
|
||||||
|
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||||
|
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{80}]
|
||||||
|
[^\x{ff}]
|
||||||
|
[^\x{100}]
|
||||||
|
[^\x{1000}]
|
||||||
|
[^\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i [^\x{80}]
|
||||||
|
/i [^\x{ff}]
|
||||||
|
/i [^\x{100}]
|
||||||
|
/i [^\x{1000}]
|
||||||
|
/i [^\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{100}]*
|
||||||
|
[^\x{1000}]+
|
||||||
|
[^\x{ffff}]??
|
||||||
|
[^\x{8000}]{4}
|
||||||
|
[^\x{8000}]*
|
||||||
|
[^\x{7fff}]{2}
|
||||||
|
[^\x{7fff}]{0,7}?
|
||||||
|
[^\x{100}]{5}
|
||||||
|
[^\x{100}]?+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i [^\x{100}]*
|
||||||
|
/i [^\x{1000}]+
|
||||||
|
/i [^\x{ffff}]??
|
||||||
|
/i [^\x{8000}]{4}
|
||||||
|
/i [^\x{8000}]*
|
||||||
|
/i [^\x{7fff}]{2}
|
||||||
|
/i [^\x{7fff}]{0,7}?
|
||||||
|
/i [^\x{100}]{5}
|
||||||
|
/i [^\x{100}]?+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||||
|
XX
|
||||||
|
0: XX
|
||||||
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||||
|
XX
|
||||||
|
0: XX
|
||||||
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||||
|
|
||||||
|
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\x{100}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x{100}-\x{200}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\x{d800}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^\x{ffff}+/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}?/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}*/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{3}/i
|
||||||
|
\x{ffff}\x{ffff}\x{ffff}
|
||||||
|
0: \x{ffff}\x{ffff}\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{0,3}/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[b-\xff] (neg){12,}
|
||||||
|
[\x00-a] (neg)*+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
|
||||||
|
\s*
|
||||||
|
|
||||||
|
[0-9A-Z_a-z]++
|
||||||
|
\W+
|
||||||
|
|
||||||
|
[\x00-/:-\xff] (neg)*?
|
||||||
|
\d
|
||||||
|
0
|
||||||
|
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
|
||||||
|
\w*
|
||||||
|
A
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
a*
|
||||||
|
[b-\xff\x{100}-\x{200}]?+
|
||||||
|
a#
|
||||||
|
a*+
|
||||||
|
[b-\xff\x{100}-\x{200}]?
|
||||||
|
b#
|
||||||
|
[a-f]*+
|
||||||
|
[g-\xff\x{100}-\x{200}]*+
|
||||||
|
#
|
||||||
|
[g-\xff\x{100}-\x{200}]*+
|
||||||
|
[a-c]*+
|
||||||
|
#
|
||||||
|
[g-\xff\x{100}-\x{200}]*
|
||||||
|
[a-h]*+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\x{1234}\x{4321}]{2,4}?/
|
||||||
|
\x{1234}\x{1234}\x{1234}
|
||||||
|
0: \x{1234}\x{1234}
|
||||||
|
|
||||||
|
# Check maximum non-UTF character size for the 16-bit library.
|
||||||
|
|
||||||
|
/\x{ffff}/
|
||||||
|
A\x{ffff}B
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/\x{10000}/
|
||||||
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{20000}/
|
||||||
|
|
||||||
|
# Check maximum character size for the 32-bit library. These will all give
|
||||||
|
# errors in the 16-bit library.
|
||||||
|
|
||||||
|
/\x{110000}/
|
||||||
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{7fffffff}/
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{80000000}/
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{ffffffff}/
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{100000000}/
|
||||||
|
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{17777777777}/
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{20000000000}/
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{37777777777}/
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{40000000000}/
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{7fffffff}\x{7fffffff}/I
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{80000000}\x{80000000}/I
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{ffffffff}\x{ffffffff}/I
|
||||||
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
# Non-UTF characters
|
||||||
|
|
||||||
|
/\C{2,3}/
|
||||||
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
|
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
0: \x00\x01\x02
|
||||||
|
|
||||||
|
/\x{400000}\x{800000}/IBi
|
||||||
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
# Check character ranges
|
||||||
|
|
||||||
|
/[\H]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||||
|
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||||
|
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||||
|
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||||
|
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||||
|
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||||
|
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||||
|
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
||||||
|
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
||||||
|
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
||||||
|
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
||||||
|
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
||||||
|
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
||||||
|
\xfa \xfb \xfc \xfd \xfe \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/[\V]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
|
||||||
|
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||||
|
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
|
||||||
|
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
||||||
|
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||||||
|
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
|
||||||
|
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
|
||||||
|
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
|
||||||
|
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
|
||||||
|
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
|
||||||
|
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
|
||||||
|
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
|
||||||
|
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
||||||
|
\xfc \xfd \xfe \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
# End of testinput11
|
|
@ -0,0 +1,681 @@
|
||||||
|
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||||||
|
# features that are not compatible with the 8-bit library, or which give
|
||||||
|
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||||||
|
# different, so they have separate output files.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x0ab
|
||||||
|
|
||||||
|
/[^\x{c4}]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{c4}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\x{100}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{100}
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional leading comment
|
||||||
|
(?: (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address
|
||||||
|
| # or
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # one word, optionally followed by....
|
||||||
|
(?:
|
||||||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||||
|
\(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) | # comments, or...
|
||||||
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
# quoted strings
|
||||||
|
)*
|
||||||
|
< (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # leading <
|
||||||
|
(?: @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* , (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
)* # further okay, if led by comma
|
||||||
|
: # closing colon
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* )? # optional route
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address spec
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* > # trailing >
|
||||||
|
# name and address
|
||||||
|
) (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional trailing comment
|
||||||
|
/Ix
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Contains explicit CR or LF match
|
||||||
|
Options: extended
|
||||||
|
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||||
|
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||||
|
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/[\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09<
|
||||||
|
0: \x09
|
||||||
|
|
||||||
|
/[\h]+/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09\x20\xa0<
|
||||||
|
0: \x09 \xa0
|
||||||
|
|
||||||
|
/[\v]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\h+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x09 \x20 \xa0 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
0: \x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/[\h\x{dc00}]+/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x09 \x20 \xa0 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
||||||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||||
|
0: \x{200a}\xa0\x{2000}
|
||||||
|
|
||||||
|
/\H+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
0: \x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
0: \x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/[\H\x{d800}]+/
|
||||||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
||||||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||||
|
0: \x{1fff}\x{200b}
|
||||||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
||||||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||||
|
0: \x9f\xa1\x{2fff}\x{3001}
|
||||||
|
|
||||||
|
/\v+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/[\v\x{dc00}]+/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\V+/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
0: \x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
0: \x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/[\V\x{d800}]+/
|
||||||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||||
|
0: \x{2027}\x{2030}
|
||||||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||||
|
0: \x09\x0e\x84\x86
|
||||||
|
|
||||||
|
/\R+/I,bsr=unicode
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||||
|
0: \x{2028}\x{2029}
|
||||||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||||
|
0: \x85\x0a\x0b\x0c\x0d
|
||||||
|
|
||||||
|
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{d800}
|
||||||
|
Last code unit = \x{dd00}
|
||||||
|
Subject length lower bound = 6
|
||||||
|
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||||
|
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{80}]
|
||||||
|
[^\x{ff}]
|
||||||
|
[^\x{100}]
|
||||||
|
[^\x{1000}]
|
||||||
|
[^\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i [^\x{80}]
|
||||||
|
/i [^\x{ff}]
|
||||||
|
/i [^\x{100}]
|
||||||
|
/i [^\x{1000}]
|
||||||
|
/i [^\x{ffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[^\x{100}]*
|
||||||
|
[^\x{1000}]+
|
||||||
|
[^\x{ffff}]??
|
||||||
|
[^\x{8000}]{4}
|
||||||
|
[^\x{8000}]*
|
||||||
|
[^\x{7fff}]{2}
|
||||||
|
[^\x{7fff}]{0,7}?
|
||||||
|
[^\x{100}]{5}
|
||||||
|
[^\x{100}]?+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i [^\x{100}]*
|
||||||
|
/i [^\x{1000}]+
|
||||||
|
/i [^\x{ffff}]??
|
||||||
|
/i [^\x{8000}]{4}
|
||||||
|
/i [^\x{8000}]*
|
||||||
|
/i [^\x{7fff}]{2}
|
||||||
|
/i [^\x{7fff}]{0,7}?
|
||||||
|
/i [^\x{100}]{5}
|
||||||
|
/i [^\x{100}]?+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||||
|
XX
|
||||||
|
0: XX
|
||||||
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||||
|
XX
|
||||||
|
0: XX
|
||||||
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||||
|
|
||||||
|
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\x{100}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x{100}-\x{200}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
\x{d800}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^\x{ffff}+/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}?/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}*/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{3}/i
|
||||||
|
\x{ffff}\x{ffff}\x{ffff}
|
||||||
|
0: \x{ffff}\x{ffff}\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{0,3}/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[b-\xff] (neg){12,}
|
||||||
|
[\x00-a] (neg)*+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
|
||||||
|
\s*
|
||||||
|
|
||||||
|
[0-9A-Z_a-z]++
|
||||||
|
\W+
|
||||||
|
|
||||||
|
[\x00-/:-\xff] (neg)*?
|
||||||
|
\d
|
||||||
|
0
|
||||||
|
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
|
||||||
|
\w*
|
||||||
|
A
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
a*
|
||||||
|
[b-\xff\x{100}-\x{200}]?+
|
||||||
|
a#
|
||||||
|
a*+
|
||||||
|
[b-\xff\x{100}-\x{200}]?
|
||||||
|
b#
|
||||||
|
[a-f]*+
|
||||||
|
[g-\xff\x{100}-\x{200}]*+
|
||||||
|
#
|
||||||
|
[g-\xff\x{100}-\x{200}]*+
|
||||||
|
[a-c]*+
|
||||||
|
#
|
||||||
|
[g-\xff\x{100}-\x{200}]*
|
||||||
|
[a-h]*+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\x{1234}\x{4321}]{2,4}?/
|
||||||
|
\x{1234}\x{1234}\x{1234}
|
||||||
|
0: \x{1234}\x{1234}
|
||||||
|
|
||||||
|
# Check maximum non-UTF character size for the 16-bit library.
|
||||||
|
|
||||||
|
/\x{ffff}/
|
||||||
|
A\x{ffff}B
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/\x{10000}/
|
||||||
|
|
||||||
|
/\o{20000}/
|
||||||
|
|
||||||
|
# Check maximum character size for the 32-bit library. These will all give
|
||||||
|
# errors in the 16-bit library.
|
||||||
|
|
||||||
|
/\x{110000}/
|
||||||
|
|
||||||
|
/\x{7fffffff}/
|
||||||
|
|
||||||
|
/\x{80000000}/
|
||||||
|
|
||||||
|
/\x{ffffffff}/
|
||||||
|
|
||||||
|
/\x{100000000}/
|
||||||
|
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{17777777777}/
|
||||||
|
|
||||||
|
/\o{20000000000}/
|
||||||
|
|
||||||
|
/\o{37777777777}/
|
||||||
|
|
||||||
|
/\o{40000000000}/
|
||||||
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\x{7fffffff}\x{7fffffff}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{7fffffff}
|
||||||
|
Last code unit = \x{7fffffff}
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/\x{80000000}\x{80000000}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{80000000}
|
||||||
|
Last code unit = \x{80000000}
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/\x{ffffffff}\x{ffffffff}/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
First code unit = \x{ffffffff}
|
||||||
|
Last code unit = \x{ffffffff}
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
# Non-UTF characters
|
||||||
|
|
||||||
|
/\C{2,3}/
|
||||||
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||||||
|
0: \x{400000}\x{400001}\x{400002}
|
||||||
|
|
||||||
|
/\x{400000}\x{800000}/IBi
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
/i \x{400000}\x{800000}
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: caseless
|
||||||
|
First code unit = \x{400000}
|
||||||
|
Last code unit = \x{800000}
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
# Check character ranges
|
||||||
|
|
||||||
|
/[\H]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||||
|
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||||
|
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||||
|
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||||
|
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||||
|
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||||
|
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||||
|
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
||||||
|
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
||||||
|
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
||||||
|
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
||||||
|
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
||||||
|
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
||||||
|
\xfa \xfb \xfc \xfd \xfe \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/[\V]/IB
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
|
||||||
|
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||||
|
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
|
||||||
|
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
||||||
|
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||||||
|
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
|
||||||
|
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
|
||||||
|
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
|
||||||
|
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
|
||||||
|
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
|
||||||
|
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
|
||||||
|
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
|
||||||
|
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
||||||
|
\xfc \xfd \xfe \xff
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
# End of testinput11
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,27 @@
|
||||||
|
# These DFA tests are for the handling of characters greater than 255 in
|
||||||
|
# 16-bit or 32-bit, non-UTF mode.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
#subject dfa
|
||||||
|
|
||||||
|
/^\x{ffff}+/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}?/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}*/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{3}/i
|
||||||
|
\x{ffff}\x{ffff}\x{ffff}
|
||||||
|
0: \x{ffff}\x{ffff}\x{ffff}
|
||||||
|
|
||||||
|
/^\x{ffff}{0,3}/i
|
||||||
|
\x{ffff}
|
||||||
|
0: \x{ffff}
|
||||||
|
|
||||||
|
# End of testinput13
|
|
@ -6723,7 +6723,7 @@ Subject length lower bound = 5
|
||||||
1: \x0d
|
1: \x0d
|
||||||
2: \x0a
|
2: \x0a
|
||||||
|
|
||||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
|
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
May match empty string
|
May match empty string
|
||||||
No options
|
No options
|
||||||
|
|
|
@ -1281,13 +1281,13 @@ Partial match: abcde
|
||||||
X\=ps
|
X\=ps
|
||||||
Partial match: X
|
Partial match: X
|
||||||
|
|
||||||
/\sxxx\s/utf,tables=1
|
/\sxxx\s/utf,tables=2
|
||||||
AB\x{85}xxx\x{a0}XYZ
|
AB\x{85}xxx\x{a0}XYZ
|
||||||
No match
|
0: \x{85}xxx\x{a0}
|
||||||
AB\x{a0}xxx\x{85}XYZ
|
AB\x{a0}xxx\x{85}XYZ
|
||||||
No match
|
0: \x{a0}xxx\x{85}
|
||||||
|
|
||||||
/\S \S/utf,tables=1
|
/\S \S/utf,tables=2
|
||||||
\x{a2} \x{84}
|
\x{a2} \x{84}
|
||||||
0: \x{a2} \x{84}
|
0: \x{a2} \x{84}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,745 @@
|
||||||
|
# These are a few representative patterns whose lengths and offsets are to be
|
||||||
|
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||||
|
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||||
|
# are all themselves checked in other tests. Unicode, including property
|
||||||
|
# support, is required for these tests.
|
||||||
|
|
||||||
|
#pattern fullbincode,memory
|
||||||
|
|
||||||
|
/((?i)b)/
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 5 CBra 1
|
||||||
|
5 /i b
|
||||||
|
7 5 Ket
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s)(.*X|^B)/
|
||||||
|
Memory allocation (code space): 38
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 16 Bra
|
||||||
|
2 7 CBra 1
|
||||||
|
5 AllAny*
|
||||||
|
7 X
|
||||||
|
9 5 Alt
|
||||||
|
11 ^
|
||||||
|
12 B
|
||||||
|
14 12 Ket
|
||||||
|
16 16 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s:.*X|^B)/
|
||||||
|
Memory allocation (code space): 36
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 15 Bra
|
||||||
|
2 6 Bra
|
||||||
|
4 AllAny*
|
||||||
|
6 X
|
||||||
|
8 5 Alt
|
||||||
|
10 ^
|
||||||
|
11 B
|
||||||
|
13 11 Ket
|
||||||
|
15 15 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[[:alnum:]]/
|
||||||
|
Memory allocation (code space): 46
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 20 Bra
|
||||||
|
2 ^
|
||||||
|
3 [0-9A-Za-z]
|
||||||
|
20 20 Ket
|
||||||
|
22 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/#/Ix
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 2 Bra
|
||||||
|
2 2 Ket
|
||||||
|
4 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
May match empty string
|
||||||
|
Options: extended
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 0
|
||||||
|
|
||||||
|
/a#/Ix
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: extended
|
||||||
|
First code unit = 'a'
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/x?+/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 x?+
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x++/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 x++
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x{1,3}+/
|
||||||
|
Memory allocation (code space): 20
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 7 Bra
|
||||||
|
2 x
|
||||||
|
4 x{0,2}+
|
||||||
|
7 7 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(x)*+/
|
||||||
|
Memory allocation (code space): 26
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 Braposzero
|
||||||
|
3 5 CBraPos 1
|
||||||
|
6 x
|
||||||
|
8 5 KetRpos
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||||
|
Memory allocation (code space): 142
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 68 Bra
|
||||||
|
2 ^
|
||||||
|
3 63 CBra 1
|
||||||
|
6 5 CBra 2
|
||||||
|
9 a+
|
||||||
|
11 5 Ket
|
||||||
|
13 21 CBra 3
|
||||||
|
16 [ab]+?
|
||||||
|
34 21 Ket
|
||||||
|
36 21 CBra 4
|
||||||
|
39 [bc]+
|
||||||
|
57 21 Ket
|
||||||
|
59 5 CBra 5
|
||||||
|
62 \w*+
|
||||||
|
64 5 Ket
|
||||||
|
66 63 Ket
|
||||||
|
68 68 Ket
|
||||||
|
70 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 1648
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 821 Bra
|
||||||
|
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
820 \b
|
||||||
|
821 821 Ket
|
||||||
|
823 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 1628
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 811 Bra
|
||||||
|
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
810 \b
|
||||||
|
811 811 Ket
|
||||||
|
813 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)b)/
|
||||||
|
Memory allocation (code space): 32
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 9 CBra 1
|
||||||
|
5 a
|
||||||
|
7 2 Recurse
|
||||||
|
9 b
|
||||||
|
11 9 Ket
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)+b)/
|
||||||
|
Memory allocation (code space): 40
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 17 Bra
|
||||||
|
2 13 CBra 1
|
||||||
|
5 a
|
||||||
|
7 4 Once
|
||||||
|
9 2 Recurse
|
||||||
|
11 4 KetRmax
|
||||||
|
13 b
|
||||||
|
15 13 Ket
|
||||||
|
17 17 Ket
|
||||||
|
19 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||||
|
Memory allocation (code space): 54
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
2 a
|
||||||
|
4 5 CBra 1
|
||||||
|
7 b
|
||||||
|
9 4 Alt
|
||||||
|
11 c
|
||||||
|
13 9 Ket
|
||||||
|
15 d
|
||||||
|
17 5 CBra 2
|
||||||
|
20 e
|
||||||
|
22 5 Ket
|
||||||
|
24 24 Ket
|
||||||
|
26 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||||
|
Memory allocation (code space): 64
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 29 Bra
|
||||||
|
2 18 Bra
|
||||||
|
4 a
|
||||||
|
6 12 CBra 1
|
||||||
|
9 c
|
||||||
|
11 5 CBra 2
|
||||||
|
14 d
|
||||||
|
16 5 Ket
|
||||||
|
18 12 Ket
|
||||||
|
20 18 Ket
|
||||||
|
22 5 CBra 3
|
||||||
|
25 a
|
||||||
|
27 5 Ket
|
||||||
|
29 29 Ket
|
||||||
|
31 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||||
|
Memory allocation (code space): 54
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
2 5 CBra 1
|
||||||
|
5 a
|
||||||
|
7 5 Ket
|
||||||
|
9 Any
|
||||||
|
10 Any
|
||||||
|
11 Any
|
||||||
|
12 \1
|
||||||
|
14 bbb
|
||||||
|
20 2 Recurse
|
||||||
|
22 d
|
||||||
|
24 24 Ket
|
||||||
|
26 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(?C255)de(?C)f/
|
||||||
|
Memory allocation (code space): 50
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 22 Bra
|
||||||
|
2 abc
|
||||||
|
8 Callout 255 10 1
|
||||||
|
12 de
|
||||||
|
16 Callout 0 16 1
|
||||||
|
20 f
|
||||||
|
22 22 Ket
|
||||||
|
24 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abcde/auto_callout
|
||||||
|
Memory allocation (code space): 78
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 36 Bra
|
||||||
|
2 Callout 255 0 1
|
||||||
|
6 a
|
||||||
|
8 Callout 255 1 1
|
||||||
|
12 b
|
||||||
|
14 Callout 255 2 1
|
||||||
|
18 c
|
||||||
|
20 Callout 255 3 1
|
||||||
|
24 d
|
||||||
|
26 Callout 255 4 1
|
||||||
|
30 e
|
||||||
|
32 Callout 255 5 0
|
||||||
|
36 36 Ket
|
||||||
|
38 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100}/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{1000}/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{1000}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10000}/utf
|
||||||
|
Memory allocation (code space): 16
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
2 \x{10000}
|
||||||
|
5 5 Ket
|
||||||
|
7 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100000}/utf
|
||||||
|
Memory allocation (code space): 16
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
2 \x{100000}
|
||||||
|
5 5 Ket
|
||||||
|
7 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10ffff}/utf
|
||||||
|
Memory allocation (code space): 16
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
2 \x{10ffff}
|
||||||
|
5 5 Ket
|
||||||
|
7 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{110000}/utf
|
||||||
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/[\x{ff}]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{ff}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x80/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{80}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\xff/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{ff}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||||
|
Memory allocation (code space): 26
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 A\x{2262}\x{391}.
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = 'A'
|
||||||
|
Last code unit = '.'
|
||||||
|
Subject length lower bound = 4
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||||
|
Memory allocation (code space): 22
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
2 \x{d55c}\x{ad6d}\x{c5b4}
|
||||||
|
8 8 Ket
|
||||||
|
10 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \x{d55c}
|
||||||
|
Last code unit = \x{c5b4}
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||||
|
Memory allocation (code space): 22
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
2 \x{65e5}\x{672c}\x{8a9e}
|
||||||
|
8 8 Ket
|
||||||
|
10 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \x{65e5}
|
||||||
|
Last code unit = \x{8a9e}
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[Z\x{100}]/utf
|
||||||
|
Memory allocation (code space): 54
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
2 [Z\x{100}]
|
||||||
|
24 24 Ket
|
||||||
|
26 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||||
|
Memory allocation (code space): 26
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 ^
|
||||||
|
3 [\x{100}-\x{150}]
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E]/utf
|
||||||
|
Memory allocation (code space): 26
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 ^
|
||||||
|
3 [\x{100}-\x{150}]
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E/utf
|
||||||
|
Failed: error 106 at offset 13: missing terminating ] for character class
|
||||||
|
|
||||||
|
/[\p{L}]/
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{^L}]/
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\P{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{L}]/
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\P{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{^L}]/
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[abc\p{L}\x{0660}]/utf
|
||||||
|
Memory allocation (code space): 60
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 27 Bra
|
||||||
|
2 [a-c\p{L}\x{660}]
|
||||||
|
27 27 Ket
|
||||||
|
29 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}]/utf
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{Nd}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}+-]+/utf
|
||||||
|
Memory allocation (code space): 58
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 26 Bra
|
||||||
|
2 [+\-\p{Nd}]++
|
||||||
|
26 26 Ket
|
||||||
|
28 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||||
|
Memory allocation (code space): 32
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||||
|
Memory allocation (code space): 32
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{105}-\x{109}]/i,utf
|
||||||
|
Memory allocation (code space): 24
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\x{104}-\x{109}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( ( (?(1)0|) )* )/x
|
||||||
|
Memory allocation (code space): 52
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 23 Bra
|
||||||
|
2 19 CBra 1
|
||||||
|
5 Brazero
|
||||||
|
6 13 SCBra 2
|
||||||
|
9 6 Cond
|
||||||
|
11 1 Cond ref
|
||||||
|
13 0
|
||||||
|
15 2 Alt
|
||||||
|
17 8 Ket
|
||||||
|
19 13 KetRmax
|
||||||
|
21 19 Ket
|
||||||
|
23 23 Ket
|
||||||
|
25 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( (?(1)0|)* )/x
|
||||||
|
Memory allocation (code space): 42
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
2 14 CBra 1
|
||||||
|
5 Brazero
|
||||||
|
6 6 SCond
|
||||||
|
8 1 Cond ref
|
||||||
|
10 0
|
||||||
|
12 2 Alt
|
||||||
|
14 8 KetRmax
|
||||||
|
16 14 Ket
|
||||||
|
18 18 Ket
|
||||||
|
20 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{aa}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{aa}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^a]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^a]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^\x{aa}]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/utf
|
||||||
|
Memory allocation (code space): 14
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^\x{aa}]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
#pattern -memory
|
||||||
|
|
||||||
|
/[^\d]/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [^\p{Nd}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 26 Bra
|
||||||
|
2 [ -~\x80-\xff\P{L}]++
|
||||||
|
26 26 Ket
|
||||||
|
28 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 26 Bra
|
||||||
|
2 [ -~\x80-\xff\P{L}]++
|
||||||
|
26 26 Ket
|
||||||
|
28 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 [\p{L}]++
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:]\S]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 [\P{L}\P{Xsp}]++
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 60 Bra
|
||||||
|
2 abc
|
||||||
|
8 5 CBra 1
|
||||||
|
11 d
|
||||||
|
13 4 Alt
|
||||||
|
15 e
|
||||||
|
17 9 Ket
|
||||||
|
19 *THEN
|
||||||
|
20 x
|
||||||
|
22 12 CBra 2
|
||||||
|
25 123
|
||||||
|
31 *THEN
|
||||||
|
32 4
|
||||||
|
34 24 Alt
|
||||||
|
36 567
|
||||||
|
42 5 CBra 3
|
||||||
|
45 b
|
||||||
|
47 4 Alt
|
||||||
|
49 q
|
||||||
|
51 9 Ket
|
||||||
|
53 *THEN
|
||||||
|
54 xx
|
||||||
|
58 36 Ket
|
||||||
|
60 60 Ket
|
||||||
|
62 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(((a\2)|(a*)\g<-1>))*a?/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 39 Bra
|
||||||
|
2 Brazero
|
||||||
|
3 32 SCBra 1
|
||||||
|
6 27 Once
|
||||||
|
8 12 CBra 2
|
||||||
|
11 7 CBra 3
|
||||||
|
14 a
|
||||||
|
16 \2
|
||||||
|
18 7 Ket
|
||||||
|
20 11 Alt
|
||||||
|
22 5 CBra 4
|
||||||
|
25 a*
|
||||||
|
27 5 Ket
|
||||||
|
29 22 Recurse
|
||||||
|
31 23 Ket
|
||||||
|
33 27 Ket
|
||||||
|
35 32 KetRmax
|
||||||
|
37 a?+
|
||||||
|
39 39 Ket
|
||||||
|
41 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# End of testinput8
|
|
@ -0,0 +1,745 @@
|
||||||
|
# These are a few representative patterns whose lengths and offsets are to be
|
||||||
|
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||||
|
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||||
|
# are all themselves checked in other tests. Unicode, including property
|
||||||
|
# support, is required for these tests.
|
||||||
|
|
||||||
|
#pattern fullbincode,memory
|
||||||
|
|
||||||
|
/((?i)b)/
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 5 CBra 1
|
||||||
|
5 /i b
|
||||||
|
7 5 Ket
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s)(.*X|^B)/
|
||||||
|
Memory allocation (code space): 76
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 16 Bra
|
||||||
|
2 7 CBra 1
|
||||||
|
5 AllAny*
|
||||||
|
7 X
|
||||||
|
9 5 Alt
|
||||||
|
11 ^
|
||||||
|
12 B
|
||||||
|
14 12 Ket
|
||||||
|
16 16 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s:.*X|^B)/
|
||||||
|
Memory allocation (code space): 72
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 15 Bra
|
||||||
|
2 6 Bra
|
||||||
|
4 AllAny*
|
||||||
|
6 X
|
||||||
|
8 5 Alt
|
||||||
|
10 ^
|
||||||
|
11 B
|
||||||
|
13 11 Ket
|
||||||
|
15 15 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[[:alnum:]]/
|
||||||
|
Memory allocation (code space): 60
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 12 Bra
|
||||||
|
2 ^
|
||||||
|
3 [0-9A-Za-z]
|
||||||
|
12 12 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/#/Ix
|
||||||
|
Memory allocation (code space): 20
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 2 Bra
|
||||||
|
2 2 Ket
|
||||||
|
4 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
May match empty string
|
||||||
|
Options: extended
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 0
|
||||||
|
|
||||||
|
/a#/Ix
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: extended
|
||||||
|
First code unit = 'a'
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/x?+/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 x?+
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x++/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 x++
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x{1,3}+/
|
||||||
|
Memory allocation (code space): 40
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 7 Bra
|
||||||
|
2 x
|
||||||
|
4 x{0,2}+
|
||||||
|
7 7 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(x)*+/
|
||||||
|
Memory allocation (code space): 52
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 Braposzero
|
||||||
|
3 5 CBraPos 1
|
||||||
|
6 x
|
||||||
|
8 5 KetRpos
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||||
|
Memory allocation (code space): 220
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 52 Bra
|
||||||
|
2 ^
|
||||||
|
3 47 CBra 1
|
||||||
|
6 5 CBra 2
|
||||||
|
9 a+
|
||||||
|
11 5 Ket
|
||||||
|
13 13 CBra 3
|
||||||
|
16 [ab]+?
|
||||||
|
26 13 Ket
|
||||||
|
28 13 CBra 4
|
||||||
|
31 [bc]+
|
||||||
|
41 13 Ket
|
||||||
|
43 5 CBra 5
|
||||||
|
46 \w*+
|
||||||
|
48 5 Ket
|
||||||
|
50 47 Ket
|
||||||
|
52 52 Ket
|
||||||
|
54 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 3296
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 821 Bra
|
||||||
|
2 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
820 \b
|
||||||
|
821 821 Ket
|
||||||
|
823 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 3256
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 811 Bra
|
||||||
|
2 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
810 \b
|
||||||
|
811 811 Ket
|
||||||
|
813 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)b)/
|
||||||
|
Memory allocation (code space): 64
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 9 CBra 1
|
||||||
|
5 a
|
||||||
|
7 2 Recurse
|
||||||
|
9 b
|
||||||
|
11 9 Ket
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)+b)/
|
||||||
|
Memory allocation (code space): 80
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 17 Bra
|
||||||
|
2 13 CBra 1
|
||||||
|
5 a
|
||||||
|
7 4 Once
|
||||||
|
9 2 Recurse
|
||||||
|
11 4 KetRmax
|
||||||
|
13 b
|
||||||
|
15 13 Ket
|
||||||
|
17 17 Ket
|
||||||
|
19 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||||
|
Memory allocation (code space): 108
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
2 a
|
||||||
|
4 5 CBra 1
|
||||||
|
7 b
|
||||||
|
9 4 Alt
|
||||||
|
11 c
|
||||||
|
13 9 Ket
|
||||||
|
15 d
|
||||||
|
17 5 CBra 2
|
||||||
|
20 e
|
||||||
|
22 5 Ket
|
||||||
|
24 24 Ket
|
||||||
|
26 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||||
|
Memory allocation (code space): 128
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 29 Bra
|
||||||
|
2 18 Bra
|
||||||
|
4 a
|
||||||
|
6 12 CBra 1
|
||||||
|
9 c
|
||||||
|
11 5 CBra 2
|
||||||
|
14 d
|
||||||
|
16 5 Ket
|
||||||
|
18 12 Ket
|
||||||
|
20 18 Ket
|
||||||
|
22 5 CBra 3
|
||||||
|
25 a
|
||||||
|
27 5 Ket
|
||||||
|
29 29 Ket
|
||||||
|
31 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||||
|
Memory allocation (code space): 108
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
2 5 CBra 1
|
||||||
|
5 a
|
||||||
|
7 5 Ket
|
||||||
|
9 Any
|
||||||
|
10 Any
|
||||||
|
11 Any
|
||||||
|
12 \1
|
||||||
|
14 bbb
|
||||||
|
20 2 Recurse
|
||||||
|
22 d
|
||||||
|
24 24 Ket
|
||||||
|
26 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(?C255)de(?C)f/
|
||||||
|
Memory allocation (code space): 100
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 22 Bra
|
||||||
|
2 abc
|
||||||
|
8 Callout 255 10 1
|
||||||
|
12 de
|
||||||
|
16 Callout 0 16 1
|
||||||
|
20 f
|
||||||
|
22 22 Ket
|
||||||
|
24 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abcde/auto_callout
|
||||||
|
Memory allocation (code space): 156
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 36 Bra
|
||||||
|
2 Callout 255 0 1
|
||||||
|
6 a
|
||||||
|
8 Callout 255 1 1
|
||||||
|
12 b
|
||||||
|
14 Callout 255 2 1
|
||||||
|
18 c
|
||||||
|
20 Callout 255 3 1
|
||||||
|
24 d
|
||||||
|
26 Callout 255 4 1
|
||||||
|
30 e
|
||||||
|
32 Callout 255 5 0
|
||||||
|
36 36 Ket
|
||||||
|
38 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100}/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{1000}/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{1000}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10000}/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{10000}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100000}/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100000}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10ffff}/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{10ffff}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{110000}/utf
|
||||||
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/[\x{ff}]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{ff}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x80/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{80}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\xff/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{ff}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||||
|
Memory allocation (code space): 52
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 A\x{2262}\x{391}.
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = 'A'
|
||||||
|
Last code unit = '.'
|
||||||
|
Subject length lower bound = 4
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||||
|
Memory allocation (code space): 44
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
2 \x{d55c}\x{ad6d}\x{c5b4}
|
||||||
|
8 8 Ket
|
||||||
|
10 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \x{d55c}
|
||||||
|
Last code unit = \x{c5b4}
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||||
|
Memory allocation (code space): 44
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
2 \x{65e5}\x{672c}\x{8a9e}
|
||||||
|
8 8 Ket
|
||||||
|
10 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \x{65e5}
|
||||||
|
Last code unit = \x{8a9e}
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{100}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[Z\x{100}]/utf
|
||||||
|
Memory allocation (code space): 76
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 16 Bra
|
||||||
|
2 [Z\x{100}]
|
||||||
|
16 16 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||||
|
Memory allocation (code space): 52
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 ^
|
||||||
|
3 [\x{100}-\x{150}]
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E]/utf
|
||||||
|
Memory allocation (code space): 52
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 ^
|
||||||
|
3 [\x{100}-\x{150}]
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E/utf
|
||||||
|
Failed: error 106 at offset 13: missing terminating ] for character class
|
||||||
|
|
||||||
|
/[\p{L}]/
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{^L}]/
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\P{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{L}]/
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\P{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{^L}]/
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{L}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[abc\p{L}\x{0660}]/utf
|
||||||
|
Memory allocation (code space): 88
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 19 Bra
|
||||||
|
2 [a-c\p{L}\x{660}]
|
||||||
|
19 19 Ket
|
||||||
|
21 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}]/utf
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\p{Nd}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}+-]+/utf
|
||||||
|
Memory allocation (code space): 84
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
2 [+\-\p{Nd}]++
|
||||||
|
18 18 Ket
|
||||||
|
20 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||||
|
Memory allocation (code space): 60
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 12 Bra
|
||||||
|
2 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
12 12 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||||
|
Memory allocation (code space): 60
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 12 Bra
|
||||||
|
2 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
12 12 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{105}-\x{109}]/i,utf
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [\x{104}-\x{109}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( ( (?(1)0|) )* )/x
|
||||||
|
Memory allocation (code space): 104
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 23 Bra
|
||||||
|
2 19 CBra 1
|
||||||
|
5 Brazero
|
||||||
|
6 13 SCBra 2
|
||||||
|
9 6 Cond
|
||||||
|
11 1 Cond ref
|
||||||
|
13 0
|
||||||
|
15 2 Alt
|
||||||
|
17 8 Ket
|
||||||
|
19 13 KetRmax
|
||||||
|
21 19 Ket
|
||||||
|
23 23 Ket
|
||||||
|
25 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( (?(1)0|)* )/x
|
||||||
|
Memory allocation (code space): 84
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
2 14 CBra 1
|
||||||
|
5 Brazero
|
||||||
|
6 6 SCond
|
||||||
|
8 1 Cond ref
|
||||||
|
10 0
|
||||||
|
12 2 Alt
|
||||||
|
14 8 KetRmax
|
||||||
|
16 14 Ket
|
||||||
|
18 18 Ket
|
||||||
|
20 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 a
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{aa}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 \x{aa}
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^a]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^a]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^\x{aa}]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/utf
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 4 Bra
|
||||||
|
2 [^\x{aa}]
|
||||||
|
4 4 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
#pattern -memory
|
||||||
|
|
||||||
|
/[^\d]/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
2 [^\p{Nd}]
|
||||||
|
9 9 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
2 [ -~\x80-\xff\P{L}]++
|
||||||
|
18 18 Ket
|
||||||
|
20 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
2 [ -~\x80-\xff\P{L}]++
|
||||||
|
18 18 Ket
|
||||||
|
20 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 10 Bra
|
||||||
|
2 [\p{L}]++
|
||||||
|
10 10 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:]\S]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
2 [\P{L}\P{Xsp}]++
|
||||||
|
13 13 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 60 Bra
|
||||||
|
2 abc
|
||||||
|
8 5 CBra 1
|
||||||
|
11 d
|
||||||
|
13 4 Alt
|
||||||
|
15 e
|
||||||
|
17 9 Ket
|
||||||
|
19 *THEN
|
||||||
|
20 x
|
||||||
|
22 12 CBra 2
|
||||||
|
25 123
|
||||||
|
31 *THEN
|
||||||
|
32 4
|
||||||
|
34 24 Alt
|
||||||
|
36 567
|
||||||
|
42 5 CBra 3
|
||||||
|
45 b
|
||||||
|
47 4 Alt
|
||||||
|
49 q
|
||||||
|
51 9 Ket
|
||||||
|
53 *THEN
|
||||||
|
54 xx
|
||||||
|
58 36 Ket
|
||||||
|
60 60 Ket
|
||||||
|
62 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(((a\2)|(a*)\g<-1>))*a?/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 39 Bra
|
||||||
|
2 Brazero
|
||||||
|
3 32 SCBra 1
|
||||||
|
6 27 Once
|
||||||
|
8 12 CBra 2
|
||||||
|
11 7 CBra 3
|
||||||
|
14 a
|
||||||
|
16 \2
|
||||||
|
18 7 Ket
|
||||||
|
20 11 Alt
|
||||||
|
22 5 CBra 4
|
||||||
|
25 a*
|
||||||
|
27 5 Ket
|
||||||
|
29 22 Recurse
|
||||||
|
31 23 Ket
|
||||||
|
33 27 Ket
|
||||||
|
35 32 KetRmax
|
||||||
|
37 a?+
|
||||||
|
39 39 Ket
|
||||||
|
41 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# End of testinput8
|
|
@ -0,0 +1,745 @@
|
||||||
|
# These are a few representative patterns whose lengths and offsets are to be
|
||||||
|
# shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||||
|
# sizes don't go horribly wrong when something is changed. The pattern contents
|
||||||
|
# are all themselves checked in other tests. Unicode, including property
|
||||||
|
# support, is required for these tests.
|
||||||
|
|
||||||
|
#pattern fullbincode,memory
|
||||||
|
|
||||||
|
/((?i)b)/
|
||||||
|
Memory allocation (code space): 17
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
3 7 CBra 1
|
||||||
|
8 /i b
|
||||||
|
10 7 Ket
|
||||||
|
13 13 Ket
|
||||||
|
16 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s)(.*X|^B)/
|
||||||
|
Memory allocation (code space): 25
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 21 Bra
|
||||||
|
3 9 CBra 1
|
||||||
|
8 AllAny*
|
||||||
|
10 X
|
||||||
|
12 6 Alt
|
||||||
|
15 ^
|
||||||
|
16 B
|
||||||
|
18 15 Ket
|
||||||
|
21 21 Ket
|
||||||
|
24 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?s:.*X|^B)/
|
||||||
|
Memory allocation (code space): 23
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 19 Bra
|
||||||
|
3 7 Bra
|
||||||
|
6 AllAny*
|
||||||
|
8 X
|
||||||
|
10 6 Alt
|
||||||
|
13 ^
|
||||||
|
14 B
|
||||||
|
16 13 Ket
|
||||||
|
19 19 Ket
|
||||||
|
22 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[[:alnum:]]/
|
||||||
|
Memory allocation (code space): 41
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 37 Bra
|
||||||
|
3 ^
|
||||||
|
4 [0-9A-Za-z]
|
||||||
|
37 37 Ket
|
||||||
|
40 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/#/Ix
|
||||||
|
Memory allocation (code space): 7
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 3 Bra
|
||||||
|
3 3 Ket
|
||||||
|
6 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
May match empty string
|
||||||
|
Options: extended
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 0
|
||||||
|
|
||||||
|
/a#/Ix
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 a
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: extended
|
||||||
|
First code unit = 'a'
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/x?+/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 x?+
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x++/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 x++
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/x{1,3}+/
|
||||||
|
Memory allocation (code space): 13
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 9 Bra
|
||||||
|
3 x
|
||||||
|
5 x{0,2}+
|
||||||
|
9 9 Ket
|
||||||
|
12 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(x)*+/
|
||||||
|
Memory allocation (code space): 18
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 14 Bra
|
||||||
|
3 Braposzero
|
||||||
|
4 7 CBraPos 1
|
||||||
|
9 x
|
||||||
|
11 7 KetRpos
|
||||||
|
14 14 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||||
|
Memory allocation (code space): 120
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 116 Bra
|
||||||
|
3 ^
|
||||||
|
4 109 CBra 1
|
||||||
|
9 7 CBra 2
|
||||||
|
14 a+
|
||||||
|
16 7 Ket
|
||||||
|
19 39 CBra 3
|
||||||
|
24 [ab]+?
|
||||||
|
58 39 Ket
|
||||||
|
61 39 CBra 4
|
||||||
|
66 [bc]+
|
||||||
|
100 39 Ket
|
||||||
|
103 7 CBra 5
|
||||||
|
108 \w*+
|
||||||
|
110 7 Ket
|
||||||
|
113 109 Ket
|
||||||
|
116 116 Ket
|
||||||
|
119 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 826
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 822 Bra
|
||||||
|
3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
821 \b
|
||||||
|
822 822 Ket
|
||||||
|
825 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
|
||||||
|
Memory allocation (code space): 816
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 812 Bra
|
||||||
|
3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||||
|
811 \b
|
||||||
|
812 812 Ket
|
||||||
|
815 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)b)/
|
||||||
|
Memory allocation (code space): 22
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 18 Bra
|
||||||
|
3 12 CBra 1
|
||||||
|
8 a
|
||||||
|
10 3 Recurse
|
||||||
|
13 b
|
||||||
|
15 12 Ket
|
||||||
|
18 18 Ket
|
||||||
|
21 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(a(?1)+b)/
|
||||||
|
Memory allocation (code space): 28
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 24 Bra
|
||||||
|
3 18 CBra 1
|
||||||
|
8 a
|
||||||
|
10 6 Once
|
||||||
|
13 3 Recurse
|
||||||
|
16 6 KetRmax
|
||||||
|
19 b
|
||||||
|
21 18 Ket
|
||||||
|
24 24 Ket
|
||||||
|
27 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/a(?P<name1>b|c)d(?P<longername2>e)/
|
||||||
|
Memory allocation (code space): 36
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 32 Bra
|
||||||
|
3 a
|
||||||
|
5 7 CBra 1
|
||||||
|
10 b
|
||||||
|
12 5 Alt
|
||||||
|
15 c
|
||||||
|
17 12 Ket
|
||||||
|
20 d
|
||||||
|
22 7 CBra 2
|
||||||
|
27 e
|
||||||
|
29 7 Ket
|
||||||
|
32 32 Ket
|
||||||
|
35 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
|
||||||
|
Memory allocation (code space): 45
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 41 Bra
|
||||||
|
3 25 Bra
|
||||||
|
6 a
|
||||||
|
8 17 CBra 1
|
||||||
|
13 c
|
||||||
|
15 7 CBra 2
|
||||||
|
20 d
|
||||||
|
22 7 Ket
|
||||||
|
25 17 Ket
|
||||||
|
28 25 Ket
|
||||||
|
31 7 CBra 3
|
||||||
|
36 a
|
||||||
|
38 7 Ket
|
||||||
|
41 41 Ket
|
||||||
|
44 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?P<a>a)...(?P=a)bbb(?P>a)d/
|
||||||
|
Memory allocation (code space): 34
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 30 Bra
|
||||||
|
3 7 CBra 1
|
||||||
|
8 a
|
||||||
|
10 7 Ket
|
||||||
|
13 Any
|
||||||
|
14 Any
|
||||||
|
15 Any
|
||||||
|
16 \1
|
||||||
|
19 bbb
|
||||||
|
25 3 Recurse
|
||||||
|
28 d
|
||||||
|
30 30 Ket
|
||||||
|
33 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(?C255)de(?C)f/
|
||||||
|
Memory allocation (code space): 31
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 27 Bra
|
||||||
|
3 abc
|
||||||
|
9 Callout 255 10 1
|
||||||
|
15 de
|
||||||
|
19 Callout 0 16 1
|
||||||
|
25 f
|
||||||
|
27 27 Ket
|
||||||
|
30 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abcde/auto_callout
|
||||||
|
Memory allocation (code space): 53
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 49 Bra
|
||||||
|
3 Callout 255 0 1
|
||||||
|
9 a
|
||||||
|
11 Callout 255 1 1
|
||||||
|
17 b
|
||||||
|
19 Callout 255 2 1
|
||||||
|
25 c
|
||||||
|
27 Callout 255 3 1
|
||||||
|
33 d
|
||||||
|
35 Callout 255 4 1
|
||||||
|
41 e
|
||||||
|
43 Callout 255 5 0
|
||||||
|
49 49 Ket
|
||||||
|
52 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100}/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{100}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{1000}/utf
|
||||||
|
Memory allocation (code space): 11
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 7 Bra
|
||||||
|
3 \x{1000}
|
||||||
|
7 7 Ket
|
||||||
|
10 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10000}/utf
|
||||||
|
Memory allocation (code space): 12
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
3 \x{10000}
|
||||||
|
8 8 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{100000}/utf
|
||||||
|
Memory allocation (code space): 12
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
3 \x{100000}
|
||||||
|
8 8 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{10ffff}/utf
|
||||||
|
Memory allocation (code space): 12
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 8 Bra
|
||||||
|
3 \x{10ffff}
|
||||||
|
8 8 Ket
|
||||||
|
11 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{110000}/utf
|
||||||
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/[\x{ff}]/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{ff}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{100}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x80/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{80}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\xff/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{ff}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
|
||||||
|
Memory allocation (code space): 18
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 14 Bra
|
||||||
|
3 A\x{2262}\x{391}.
|
||||||
|
14 14 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = 'A'
|
||||||
|
Last code unit = '.'
|
||||||
|
Subject length lower bound = 4
|
||||||
|
|
||||||
|
/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
|
||||||
|
Memory allocation (code space): 19
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 15 Bra
|
||||||
|
3 \x{d55c}\x{ad6d}\x{c5b4}
|
||||||
|
15 15 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \xed
|
||||||
|
Last code unit = \xb4
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/\x{65e5}\x{672c}\x{8a9e}/I,utf
|
||||||
|
Memory allocation (code space): 19
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 15 Bra
|
||||||
|
3 \x{65e5}\x{672c}\x{8a9e}
|
||||||
|
15 15 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Options: utf
|
||||||
|
First code unit = \xe6
|
||||||
|
Last code unit = \x9e
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/[\x{100}]/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{100}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[Z\x{100}]/utf
|
||||||
|
Memory allocation (code space): 47
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 43 Bra
|
||||||
|
3 [Z\x{100}]
|
||||||
|
43 43 Ket
|
||||||
|
46 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\x{100}\E-\Q\E\x{150}]/utf
|
||||||
|
Memory allocation (code space): 18
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 14 Bra
|
||||||
|
3 ^
|
||||||
|
4 [\x{100}-\x{150}]
|
||||||
|
14 14 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E]/utf
|
||||||
|
Memory allocation (code space): 18
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 14 Bra
|
||||||
|
3 ^
|
||||||
|
4 [\x{100}-\x{150}]
|
||||||
|
14 14 Ket
|
||||||
|
17 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^[\QĀ\E-\QŐ\E/utf
|
||||||
|
Failed: error 106 at offset 15: missing terminating ] for character class
|
||||||
|
|
||||||
|
/[\p{L}]/
|
||||||
|
Memory allocation (code space): 15
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [\p{L}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{^L}]/
|
||||||
|
Memory allocation (code space): 15
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [\P{L}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{L}]/
|
||||||
|
Memory allocation (code space): 15
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [\P{L}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\P{^L}]/
|
||||||
|
Memory allocation (code space): 15
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [\p{L}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[abc\p{L}\x{0660}]/utf
|
||||||
|
Memory allocation (code space): 50
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 46 Bra
|
||||||
|
3 [a-c\p{L}\x{660}]
|
||||||
|
46 46 Ket
|
||||||
|
49 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}]/utf
|
||||||
|
Memory allocation (code space): 15
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [\p{Nd}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\p{Nd}+-]+/utf
|
||||||
|
Memory allocation (code space): 48
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 44 Bra
|
||||||
|
3 [+\-\p{Nd}]++
|
||||||
|
44 44 Ket
|
||||||
|
47 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
|
||||||
|
Memory allocation (code space): 25
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 21 Bra
|
||||||
|
3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
21 21 Ket
|
||||||
|
24 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
|
||||||
|
Memory allocation (code space): 25
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 21 Bra
|
||||||
|
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||||
|
21 21 Ket
|
||||||
|
24 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x{105}-\x{109}]/i,utf
|
||||||
|
Memory allocation (code space): 17
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 13 Bra
|
||||||
|
3 [\x{104}-\x{109}]
|
||||||
|
13 13 Ket
|
||||||
|
16 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( ( (?(1)0|) )* )/x
|
||||||
|
Memory allocation (code space): 38
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 34 Bra
|
||||||
|
3 28 CBra 1
|
||||||
|
8 Brazero
|
||||||
|
9 19 SCBra 2
|
||||||
|
14 8 Cond
|
||||||
|
17 1 Cond ref
|
||||||
|
20 0
|
||||||
|
22 3 Alt
|
||||||
|
25 11 Ket
|
||||||
|
28 19 KetRmax
|
||||||
|
31 28 Ket
|
||||||
|
34 34 Ket
|
||||||
|
37 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/( (?(1)0|)* )/x
|
||||||
|
Memory allocation (code space): 30
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 26 Bra
|
||||||
|
3 20 CBra 1
|
||||||
|
8 Brazero
|
||||||
|
9 8 SCond
|
||||||
|
12 1 Cond ref
|
||||||
|
15 0
|
||||||
|
17 3 Alt
|
||||||
|
20 11 KetRmax
|
||||||
|
23 20 Ket
|
||||||
|
26 26 Ket
|
||||||
|
29 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 a
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[a]/utf
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 a
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 \x{aa}
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\xaa]/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 \x{aa}
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 [^a]
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^a]/utf
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 [^a]
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/
|
||||||
|
Memory allocation (code space): 9
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 5 Bra
|
||||||
|
3 [^\x{aa}]
|
||||||
|
5 5 Ket
|
||||||
|
8 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\xaa]/utf
|
||||||
|
Memory allocation (code space): 10
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 6 Bra
|
||||||
|
3 [^\x{aa}]
|
||||||
|
6 6 Ket
|
||||||
|
9 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
#pattern -memory
|
||||||
|
|
||||||
|
/[^\d]/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 11 Bra
|
||||||
|
3 [^\p{Nd}]
|
||||||
|
11 11 Ket
|
||||||
|
14 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 44 Bra
|
||||||
|
3 [ -~\x80-\xff\P{L}]++
|
||||||
|
44 44 Ket
|
||||||
|
47 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 44 Bra
|
||||||
|
3 [ -~\x80-\xff\P{L}]++
|
||||||
|
44 44 Ket
|
||||||
|
47 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:alpha:]]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 12 Bra
|
||||||
|
3 [\p{L}]++
|
||||||
|
12 12 Ket
|
||||||
|
15 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[[:^alpha:]\S]+/utf,ucp
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 15 Bra
|
||||||
|
3 [\P{L}\P{Xsp}]++
|
||||||
|
15 15 Ket
|
||||||
|
18 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 73 Bra
|
||||||
|
3 abc
|
||||||
|
9 7 CBra 1
|
||||||
|
14 d
|
||||||
|
16 5 Alt
|
||||||
|
19 e
|
||||||
|
21 12 Ket
|
||||||
|
24 *THEN
|
||||||
|
25 x
|
||||||
|
27 14 CBra 2
|
||||||
|
32 123
|
||||||
|
38 *THEN
|
||||||
|
39 4
|
||||||
|
41 29 Alt
|
||||||
|
44 567
|
||||||
|
50 7 CBra 3
|
||||||
|
55 b
|
||||||
|
57 5 Alt
|
||||||
|
60 q
|
||||||
|
62 12 Ket
|
||||||
|
65 *THEN
|
||||||
|
66 xx
|
||||||
|
70 43 Ket
|
||||||
|
73 73 Ket
|
||||||
|
76 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(((a\2)|(a*)\g<-1>))*a?/
|
||||||
|
------------------------------------------------------------------
|
||||||
|
0 57 Bra
|
||||||
|
3 Brazero
|
||||||
|
4 48 SCBra 1
|
||||||
|
9 40 Once
|
||||||
|
12 18 CBra 2
|
||||||
|
17 10 CBra 3
|
||||||
|
22 a
|
||||||
|
24 \2
|
||||||
|
27 10 Ket
|
||||||
|
30 16 Alt
|
||||||
|
33 7 CBra 4
|
||||||
|
38 a*
|
||||||
|
40 7 Ket
|
||||||
|
43 33 Recurse
|
||||||
|
46 34 Ket
|
||||||
|
49 40 Ket
|
||||||
|
52 48 KetRmax
|
||||||
|
55 a?+
|
||||||
|
57 57 Ket
|
||||||
|
60 End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# End of testinput8
|
|
@ -0,0 +1,498 @@
|
||||||
|
# This set of tests is run only with the 8-bit library. They do not require
|
||||||
|
# UTF-8 or Unicode property support. The file starts with all the tests of
|
||||||
|
# the POSIX interface, because that is supported only with the 8-bit library.
|
||||||
|
|
||||||
|
#forbid_utf
|
||||||
|
#pattern posix
|
||||||
|
|
||||||
|
/abc/
|
||||||
|
abc
|
||||||
|
0: abc
|
||||||
|
*** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/^abc|def/
|
||||||
|
abcdef
|
||||||
|
0: abc
|
||||||
|
abcdef\=notbol
|
||||||
|
0: def
|
||||||
|
|
||||||
|
/.*((abc)$|(def))/
|
||||||
|
defabc
|
||||||
|
0: defabc
|
||||||
|
1: abc
|
||||||
|
2: abc
|
||||||
|
defabc\=noteol
|
||||||
|
0: def
|
||||||
|
1: def
|
||||||
|
3: def
|
||||||
|
|
||||||
|
/the quick brown fox/
|
||||||
|
the quick brown fox
|
||||||
|
0: the quick brown fox
|
||||||
|
*** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
The Quick Brown Fox
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/the quick brown fox/i
|
||||||
|
the quick brown fox
|
||||||
|
0: the quick brown fox
|
||||||
|
The Quick Brown Fox
|
||||||
|
0: The Quick Brown Fox
|
||||||
|
|
||||||
|
/abc.def/
|
||||||
|
*** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
abc\ndef
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/abc$/
|
||||||
|
abc
|
||||||
|
0: abc
|
||||||
|
abc\n
|
||||||
|
0: abc
|
||||||
|
|
||||||
|
/(abc)\2/
|
||||||
|
Failed: POSIX code 15: bad back reference at offset 7
|
||||||
|
|
||||||
|
/(abc\1)/
|
||||||
|
abc
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/a*(b+)(z)(z)/
|
||||||
|
aaaabbbbzzzz
|
||||||
|
0: aaaabbbbzz
|
||||||
|
1: bbbb
|
||||||
|
2: z
|
||||||
|
3: z
|
||||||
|
aaaabbbbzzzz\=ovector=0
|
||||||
|
Matched without capture
|
||||||
|
aaaabbbbzzzz\=ovector=1
|
||||||
|
0: aaaabbbbzz
|
||||||
|
aaaabbbbzzzz\=ovector=2
|
||||||
|
0: aaaabbbbzz
|
||||||
|
1: bbbb
|
||||||
|
|
||||||
|
/ab.cd/
|
||||||
|
ab-cd
|
||||||
|
0: ab-cd
|
||||||
|
ab=cd
|
||||||
|
0: ab=cd
|
||||||
|
** Failers
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
ab\ncd
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/ab.cd/s
|
||||||
|
ab-cd
|
||||||
|
0: ab-cd
|
||||||
|
ab=cd
|
||||||
|
0: ab=cd
|
||||||
|
ab\ncd
|
||||||
|
0: ab\x0acd
|
||||||
|
|
||||||
|
/a(b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
Matched with REG_NOSUB
|
||||||
|
|
||||||
|
/a(?P<name>b)c/no_auto_capture
|
||||||
|
abc
|
||||||
|
Matched with REG_NOSUB
|
||||||
|
|
||||||
|
/a?|b?/
|
||||||
|
abc
|
||||||
|
0: a
|
||||||
|
** Failers
|
||||||
|
0:
|
||||||
|
ddd\=notempty
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/\w+A/
|
||||||
|
CDAAAAB
|
||||||
|
0: CDAAAA
|
||||||
|
|
||||||
|
/\w+A/ungreedy
|
||||||
|
CDAAAAB
|
||||||
|
0: CDA
|
||||||
|
|
||||||
|
/\Biss\B/I,aftertext
|
||||||
|
** Ignored with POSIX interface: info
|
||||||
|
Mississippi
|
||||||
|
0: iss
|
||||||
|
0+ issippi
|
||||||
|
|
||||||
|
/abc/\
|
||||||
|
Failed: POSIX code 9: bad escape sequence at offset 4
|
||||||
|
|
||||||
|
#pattern -posix
|
||||||
|
|
||||||
|
# End of POSIX tests
|
||||||
|
|
||||||
|
/a\Cb/
|
||||||
|
aXb
|
||||||
|
0: aXb
|
||||||
|
a\nb
|
||||||
|
0: a\x0ab
|
||||||
|
** Failers (too big char)
|
||||||
|
No match
|
||||||
|
A\x{123}B
|
||||||
|
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
No match
|
||||||
|
A\o{443}B
|
||||||
|
** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
|
||||||
|
** Truncation will probably give the wrong result.
|
||||||
|
No match
|
||||||
|
|
||||||
|
/\x{100}/I
|
||||||
|
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/\o{400}/I
|
||||||
|
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional leading comment
|
||||||
|
(?: (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address
|
||||||
|
| # or
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # one word, optionally followed by....
|
||||||
|
(?:
|
||||||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||||
|
\(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) | # comments, or...
|
||||||
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
# quoted strings
|
||||||
|
)*
|
||||||
|
< (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # leading <
|
||||||
|
(?: @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* , (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
)* # further okay, if led by comma
|
||||||
|
: # closing colon
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* )? # optional route
|
||||||
|
(?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) # initial word
|
||||||
|
(?: (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
|
|
||||||
|
" (?: # opening quote...
|
||||||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||||
|
| # or
|
||||||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||||
|
)* " # closing quote
|
||||||
|
) )* # further okay, if led by a period
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* @ (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # initial subdomain
|
||||||
|
(?: #
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* \. # if led by a period...
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* (?:
|
||||||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||||
|
| \[ # [
|
||||||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||||
|
\] # ]
|
||||||
|
) # ...further okay
|
||||||
|
)*
|
||||||
|
# address spec
|
||||||
|
(?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* > # trailing >
|
||||||
|
# name and address
|
||||||
|
) (?: [\040\t] | \(
|
||||||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||||
|
\) )* # optional trailing comment
|
||||||
|
/Ix
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
Contains explicit CR or LF match
|
||||||
|
Options: extended
|
||||||
|
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
||||||
|
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
||||||
|
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/\h/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x09 \x20 \xa0
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\H/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\v/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\V/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
No first code unit
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/\R/I
|
||||||
|
Capturing subpattern count = 0
|
||||||
|
No options
|
||||||
|
Starting code units: \x0a \x0b \x0c \x0d \x85
|
||||||
|
No last code unit
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/[\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09<
|
||||||
|
0: \x09
|
||||||
|
|
||||||
|
/[\h]+/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x09 \xa0]++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
>\x09\x20\xa0<
|
||||||
|
0: \x09 \xa0
|
||||||
|
|
||||||
|
/[\v]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x0a-\x0d\x85]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\H]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\h]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\V]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x09\x0e-\x84\x86-\xff]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[\x0a\V]/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x0a\x0e-\x84\x86-\xff]
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/\777/I
|
||||||
|
Failed: error 151 at offset 3: octal value is greater than \377 in 8-bit non-UTF-8 mode
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||||||
|
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
|
XX
|
||||||
|
|
||||||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||||||
|
XX
|
||||||
|
0: XX
|
||||||
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
||||||
|
|
||||||
|
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||||
|
Failed: error 177 at offset 5: character code point value in \u.... sequence is too large
|
||||||
|
|
||||||
|
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||||
|
Failed: error 177 at offset 6: character code point value in \u.... sequence is too large
|
||||||
|
|
||||||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[b-\xff] (neg){12,}+
|
||||||
|
[\x00-a] (neg)*+
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
[\x00-\x08\x0e-\x1f!-\xff] (neg)*+
|
||||||
|
\s*
|
||||||
|
|
||||||
|
[0-9A-Z_a-z]++
|
||||||
|
\W+
|
||||||
|
|
||||||
|
[\x00-/:-\xff] (neg)*+
|
||||||
|
\d
|
||||||
|
0
|
||||||
|
[\x00-/:-@[-^`{-\xff] (neg){4,6}+
|
||||||
|
\w*
|
||||||
|
A
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# End of testinput9
|
Loading…
Reference in New Issue