Tests 1 and 2 are converted (but without save/restore).

This commit is contained in:
Philip.Hazel 2014-07-24 16:32:38 +00:00
parent 1701838220
commit 017b6a1624
12 changed files with 35118 additions and 217 deletions

995
RunTest Executable file
View File

@ -0,0 +1,995 @@
#! /bin/sh
###############################################################################
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
# selected, depending on which build-time options were used.
#
# When JIT support is available, all appropriate tests are run with and without
# JIT, unless "nojit" is given on the command line. There are also two tests
# for JIT-specific features, one to be run when JIT support is available
# (unless "nojit" is specified), and one when it is not.
#
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
# possible to select which to test by giving "-8", "-16" or "-32" on the
# command line.
#
# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
# except test 10. Whatever order the arguments are in, the tests are always run
# in numerical order.
#
# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
# locale test, and failure usually means there's an issue with the locale
# rather than a bug in PCRE2, so normally subsequent tests are run. "3S" is
# useful when you want to debug or update the test.
#
# Inappropriate tests are automatically skipped (with a comment to say so): for
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
# support is compiled, test 13 is skipped.
#
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
# followed by an argument to run cross-compiled executables under a simulator,
# for example:
#
# RunTest 3 sim "qemu-arm -s 8388608"
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
# EBCDIC test that can be useful for checking certain EBCDIC features, even
# when run in an ASCII environment.
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################
# Define test titles in variables so that they can be output as a list. Some
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
title2="Test 2: API, errors, internals, and non-Perl stuff"
#title3="Test 3: Locale-specific features"
#title4A="Test 4: UTF"
#title4B=" support (Compatible with Perl >= 5.10)"
#title5="Test 5: API, internals, and non-Perl stuff for UTF"
#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
#title8="Test 8: DFA matching main functionality"
#title9="Test 9: DFA matching with UTF"
#title10="Test 10: DFA matching with Unicode properties"
#title11="Test 11: Internal offsets and code size tests"
#title12="Test 12: JIT-specific features (when JIT is available)"
#title13="Test 13: JIT-specific features (when JIT is not available)"
#title14="Test 14: Specials for the basic 8-bit library"
#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
#title17="Test 17: Specials for the basic 16/32-bit library"
#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
#title20="Test 20: DFA specials for the basic 16/32-bit library"
#title21="Test 21: Reloads for the basic 16/32-bit library"
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
#title23="Test 23: Specials for the 16-bit library"
#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
#title25="Test 25: Specials for the 32-bit library"
#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
maxtest=2
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title1
echo $title2 "(not UTF)"
# echo $title3
# echo $title4A $title4B
# echo $title5 support
# echo $title6
# echo $title7
# echo $title8
# echo $title9
# echo $title10
# echo $title11
# echo $title12
# echo $title13
# echo $title14
# echo $title15
# echo $title16
# echo $title17
# echo $title18
# echo $title19
# echo $title20
# echo $title21
# echo $title22
# echo $title23
# echo $title24
# echo $title25
# echo $title26
exit 0
fi
# Set up a suitable "diff" command for comparison. Some systems
# have a diff that lacks a -u option. Try to deal with this.
cf="diff"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
# Find the test data
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
testdata="$srcdir/testdata"
elif [ -d "./testdata" ] ; then
testdata=./testdata
elif [ -d "../testdata" ] ; then
testdata=../testdata
else
echo "Cannot find the testdata directory"
exit 1
fi
# ------ Special EBCDIC Test -------
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
./pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
./pcre2test -q $opt $testdata/testinputEBC >testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutputEBC testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-dfa" ] ; then echo " OK using DFA"
else echo " OK"
fi
done
exit 0
fi
# ------ Normal Tests ------
# Default values
arg8=
arg16=
arg32=
nojit=
sim=
skip=
valgrind=
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
# Process options and select which tests to run; for those that are explicitly
# requested, check that the necessary optional facilities are available.
do1=no
do2=no
#do3=no
#do4=no
#do5=no
#do6=no
#do7=no
#do8=no
#do9=no
#do10=no
#do11=no
#do12=no
#do13=no
#do14=no
#do15=no
#do16=no
#do17=no
#do18=no
#do19=no
#do20=no
#do21=no
#do22=no
#do23=no
#do24=no
#do25=no
#do26=no
while [ $# -gt 0 ] ; do
case $1 in
1) do1=yes;;
2) do2=yes;;
# 3) do3=yes;;
# 4) do4=yes;;
# 5) do5=yes;;
# 6) do6=yes;;
# 7) do7=yes;;
# 8) do8=yes;;
# 9) do9=yes;;
# 10) do10=yes;;
# 11) do11=yes;;
# 12) do12=yes;;
# 13) do13=yes;;
# 14) do14=yes;;
# 15) do15=yes;;
# 16) do16=yes;;
# 17) do17=yes;;
# 18) do18=yes;;
# 19) do19=yes;;
# 20) do20=yes;;
# 21) do21=yes;;
# 22) do22=yes;;
# 23) do23=yes;;
# 24) do24=yes;;
# 25) do25=yes;;
# 26) do26=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
nojit) nojit=yes;;
sim) shift; sim=$1;;
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
else
echo "Unknown option or test selector '$1'"; exit 1
fi
;;
*-*)
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
tf=`expr "$1" : '\([0-9]*\)'`
tt=`expr "$1" : '.*-\([0-9]*\)'`
if [ "$tt" = "" ] ; then tt=$maxtest; fi
if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
echo "Invalid test range '$1'"; exit 1
fi
while expr "$tf" "<=" "$tt" >/dev/null; do
eval do${tf}=yes
tf=`expr $tf + 1`
done
else
echo "Invalid test range '$1'"; exit 1
fi
;;
*) echo "Unknown option or test selector '$1'"; exit 1;;
esac
shift
done
# Find which optional facilities are available.
$sim ./pcre2test -C linksize >/dev/null
link_size=$?
if [ $link_size -lt 2 ] ; then
echo "Failed to find internal link size"
exit 1
fi
if [ $link_size -gt 4 ] ; then
echo "Failed to find internal link size"
exit 1
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
# one need be.
$sim ./pcre2test -C pcre8 >/dev/null
support8=$?
$sim ./pcre2test -C pcre16 >/dev/null
support16=$?
$sim ./pcre2test -C pcre32 >/dev/null
support32=$?
# Initialize all bitsizes skipped
test8=skip
test16=skip
test32=skip
# If no bitsize arguments, select all that are available
if [ "$arg8$arg16$arg32" = "" ] ; then
if [ $support8 -ne 0 ] ; then
test8=
fi
if [ $support16 -ne 0 ] ; then
test16=-16
fi
if [ $support32 -ne 0 ] ; then
test32=-32
fi
# Select requested bit sizes
else
if [ "$arg8" = yes ] ; then
if [ $support8 -eq 0 ] ; then
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
exit 1
fi
test8=
fi
if [ "$arg16" = yes ] ; then
if [ $support16 -eq 0 ] ; then
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
exit 1
fi
test16=-16
fi
if [ "$arg32" = yes ] ; then
if [ $support32 -eq 0 ] ; then
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
exit 1
fi
test32=-32
fi
fi
# UTF support always applies to all bit sizes if both are supported; we can't
# have UTF-8 support without UTF-16 support (for example).
$sim ./pcre2test -C utf >/dev/null
utf=$?
jitopt=
$sim ./pcre2test -C jit >/dev/null
jit=$?
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
jitopt=-jit
fi
# If no specific tests were requested, select all. Those that are not
# relevant will be automatically skipped.
if [ $do1 = no -a $do2 = no ]; then
# -a $do3 = no -a $do4 = no -a \
# $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
# $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
# $do25 = no -a $do26 = no
do1=yes
do2=yes
# do3=yes
# do4=yes
# do5=yes
# do6=yes
# do7=yes
# do8=yes
# do9=yes
# do10=yes
# do11=yes
# do12=yes
# do13=yes
# do14=yes
# do15=yes
# do16=yes
# do17=yes
# do18=yes
# do19=yes
# do20=yes
# do21=yes
# do22=yes
# do23=yes
# do24=yes
# do25=yes
# do26=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
# only of explicit skips.
for i in $skip; do eval do$i=no; done
# Show which release and which test data
echo ""
echo PCRE2 C library tests using test data from $testdata
$sim ./pcre2test /dev/null
echo ""
for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
*) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
esac
# Primary test, compatible with JIT and all versions of Perl >= 5.8
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput1 testtry
if [ $? != 0 ] ; then exit 1; fi
else exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
if [ $do2 = yes ] ; then
echo $title2 "(not UTF-$bits)"
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
$cf $testdata/testoutput2 testtry
if [ $? != 0 ] ; then exit 1; fi
else
echo " "
echo "** Test 2 requires a lot of stack. If it has crashed with a"
echo "** segmentation fault, it may be that you do not have enough"
echo "** stack available by default. Please see the 'pcre2stack' man"
echo "** page for a discussion of PCRE2's stack usage."
echo " "
exit 1
fi
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
else echo " OK"
fi
done
fi
## Locale-specific tests, provided that either the "fr_FR" or the "french"
## locale is available. The former is the Unix-like standard; the latter is
## for Windows. Another possibility is "fr". Unfortunately, different versions
## of the French locale give different outputs for some items. This test passes
## if the output matches any one of the alternative output files.
#
#if [ $do3 = yes ] ; then
# locale -a | grep '^fr_FR$' >/dev/null
# if [ $? -eq 0 ] ; then
# locale=fr_FR
# infile=$testdata/testinput3
# outfile=$testdata/testoutput3
# outfile2=$testdata/testoutput3A
# outfile3=$testdata/testoutput3B
# else
# infile=test3input
# outfile=test3output
# outfile2=test3outputA
# outfile3=test3outputB
# locale -a | grep '^french$' >/dev/null
# if [ $? -eq 0 ] ; then
# locale=french
# sed 's/fr_FR/french/' $testdata/testinput3 >test3input
# sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
# sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
# sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
# else
# locale -a | grep '^fr$' >/dev/null
# if [ $? -eq 0 ] ; then
# locale=fr
# sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
# sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
# sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
# sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
# else
# locale=
# fi
# fi
# fi
#
# if [ "$locale" != "" ] ; then
# echo $title3 "(using '$locale' locale)"
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
# if [ $? = 0 ] ; then
# if $cf $outfile testtry >teststdout || \
# $cf $outfile2 testtry >teststdout || \
# $cf $outfile3 testtry >teststdout
# then
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# else
# echo "** Locale test did not run successfully. The output did not match"
# echo " $outfile, $outfile2 or $outfile3."
# echo " This may mean that there is a problem with the locale settings rather"
# echo " than a bug in PCRE."
# exit 1
# fi
# else exit 1
# fi
# done
# else
# echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or"
# echo "'french' locales exist, or the \"locale\" command is not available"
# echo "to check for them."
# echo " "
# fi
#fi
#
## Additional tests for UTF support
#
#if [ $do4 = yes ] ; then
# echo ${title4A}-${bits}${title4B}
# if [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput4 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
#if [ $do5 = yes ] ; then
# echo ${title5}-${bits} support
# if [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput5 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
#if [ $do6 = yes ] ; then
# echo $title6
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput6 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Test non-Perl-compatible Unicode property support
#
#if [ $do7 = yes ] ; then
# echo $title7
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput7 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for DFA matching support
#
#if [ $do8 = yes ] ; then
# echo $title8
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput8 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
#fi
#
#if [ $do9 = yes ] ; then
# echo ${title9}-${bits}
# if [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput9 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
#if [ $do10 = yes ] ; then
# echo $title10
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput10 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
## Test of internal offsets and code sizes. This test is run only when there
## is Unicode property support and the link size is 2. The actual tests are
## mostly the same as in some of the above, but in this test we inspect some
## offsets and sizes that require a known link size. This is a doublecheck for
## the maintainer, just in case something changes unexpectely. The output from
## this test is not the same in 8-bit and 16-bit modes.
#
#if [ $do11 = yes ] ; then
# echo $title11
# if [ $link_size -ne 2 ] ; then
# echo " Skipped because link size is not 2"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput11-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
## Test JIT-specific features when JIT is available
#
#if [ $do12 = yes ] ; then
# echo $title12
# if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
# echo " Skipped because JIT is not available or not usable"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput12 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput12 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
## Test JIT-specific features when JIT is not available
#
#if [ $do13 = yes ] ; then
# echo $title13
# if [ $jit -ne 0 ] ; then
# echo " Skipped because JIT is available"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput13 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
## Tests for 8-bit-specific features
#
#if [ "$do14" = yes ] ; then
# echo $title14
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# else
# cp -f $testdata/saved16 testsaved16
# cp -f $testdata/saved32 testsaved32
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput14 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 8-bit-specific features (needs UTF-8 support)
#
#if [ "$do15" = yes ] ; then
# echo $title15
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput15 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 8-bit-specific features (Unicode property support)
#
#if [ $do16 = yes ] ; then
# echo $title16
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
# echo " Skipped when running 16/32-bit tests"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput16 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features
#
#if [ $do17 = yes ] ; then
# echo $title17
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput17 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features (UTF-16/32 support)
#
#if [ $do18 = yes ] ; then
# echo $title18
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput18-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features (Unicode property support)
#
#if [ $do19 = yes ] ; then
# echo $title19
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput19 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
#
#if [ $do20 = yes ] ; then
# echo $title20
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput20 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for reloads with 16/32-bit library
#
#if [ $do21 = yes ] ; then
# echo $title21
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $link_size -ne 2 ] ; then
# echo " Skipped because link size is not 2"
# else
# cp -f $testdata/saved8 testsaved8
# cp -f $testdata/saved16LE-1 testsaved16LE-1
# cp -f $testdata/saved16BE-1 testsaved16BE-1
# cp -f $testdata/saved32LE-1 testsaved32LE-1
# cp -f $testdata/saved32BE-1 testsaved32BE-1
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput21 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput21-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
## Tests for reloads with 16/32-bit library (UTF-16 support)
#
#if [ $do22 = yes ] ; then
# echo $title22
# if [ "$bits" = "8" ] ; then
# echo " Skipped when running 8-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# elif [ $link_size -ne 2 ] ; then
# echo " Skipped because link size is not 2"
# else
# cp -f $testdata/saved16LE-2 testsaved16LE-2
# cp -f $testdata/saved16BE-2 testsaved16BE-2
# cp -f $testdata/saved32LE-2 testsaved32LE-2
# cp -f $testdata/saved32BE-2 testsaved32BE-2
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput22 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput22-$bits testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do23 = yes ] ; then
# echo $title23
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
# echo " Skipped when running 8/32-bit tests"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput23 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do24 = yes ] ; then
# echo $title24
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
# echo " Skipped when running 8/32-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput24 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do25 = yes ] ; then
# echo $title25
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
# echo " Skipped when running 8/16-bit tests"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput25 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
#
#if [ $do26 = yes ] ; then
# echo $title26
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
# echo " Skipped when running 8/16-bit tests"
# elif [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput26 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# echo " OK"
# fi
#fi
# End of loop for 8/16/32-bit tests
done
# Clean up local working files
rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
# End

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "05 July 2014" "PCRE 10.00" .TH PCRE2TEST 1 "22 July 2014" "PCRE 10.00"
.SH NAME .SH NAME
pcre2test - a program for testing Perl-compatible regular expressions. pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -141,6 +141,10 @@ Output a brief summary these options and then exit.
Behave as if each pattern has the \fB/info\fP modifier; information about the Behave as if each pattern has the \fB/info\fP modifier; information about the
compiled pattern is given after compilation. compiled pattern is given after compilation.
.TP 10 .TP 10
\fB-jit\fP
Behave as if each pattern line has the \fBjit\fP modifier; after successful
compilation, each pattern is passed to the just-in-time compiler, if available.
.TP 10
\fB-pattern\fB \fImodifier-list\fP \fB-pattern\fB \fImodifier-list\fP
Behave as if each pattern line contains the given modifiers. Behave as if each pattern line contains the given modifiers.
.TP 10 .TP 10
@ -216,6 +220,17 @@ In between sets of test data, a line that begins with a hash (#) character is
interpreted as a command line. If the first character is followed by white interpreted as a command line. If the first character is followed by white
space or an exclamation mark, the line is treated as a comment, and ignored. space or an exclamation mark, the line is treated as a comment, and ignored.
Otherwise, the following commands are recognized: Otherwise, the following commands are recognized:
.sp
#forbid_utf
.sp
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
options set, which locks out the use of UTF and Unicode property features. This
is a trigger guard that is used in test files to ensure that UTF/Unicode tests
are not accidentally added to files that are used when UTF support is not
included in the library. This effect can also be obtained by the use of
\fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be unset, and
the automatic options are not displayed in pattern information, to avoid
cluttering up test output.
.sp .sp
#load <file name> #load <file name>
.sp .sp
@ -358,10 +373,11 @@ the start of a modifier list. For example:
.sp .sp
abc\=notbol,notempty abc\=notbol,notempty
.sp .sp
A backslash followed by anything else causes an error. However, if the very A backslash followed by any other non-alphanumeric character just escapes that
last character in the line is a backslash (and there is no modifier list), it character. A backslash followed by anything else causes an error. However, if
is ignored. This gives a way of passing an empty line as data, since a real the very last character in the line is a backslash (and there is no modifier
empty line terminates the data input. list), it is ignored. This gives a way of passing an empty line as data, since
a real empty line terminates the data input.
. .
. .
.SH "PATTERN MODIFIERS" .SH "PATTERN MODIFIERS"
@ -594,14 +610,17 @@ below. All other modifiers cause an error.
.rs .rs
.sp .sp
The \fB/stackguard\fP modifier is used to test the use of The \fB/stackguard\fP modifier is used to test the use of
\fBpcre2_stack_guard\fP. It must be followed by '0' or '1', specifying the \fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to
return code to be given from an external function that is passed to PCRE2 and enable stack availability to be checked during compilation (see the
used for stack checking during compilation (see the
.\" HREF .\" HREF
\fBpcre2api\fP \fBpcre2api\fP
.\" .\"
documentation for details). FIXME: this needs doing properly once the test is documentation for details). If the number specified by the modifier is greater
implemented. Mention nested parens limit. than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up
callback from \fBpcre2_compile()\fP to a local function. The argument it is
passed is the current nesting parenthesis depth; if this is greater than the
value given by the modifier, non-zero is returned, causing the compilation to
be aborted.
. .
. .
.SS "Using alternative character tables" .SS "Using alternative character tables"
@ -1210,6 +1229,6 @@ Cambridge CB2 3QH, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 05 July 2014 Last updated: 22 July 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
.fi .fi

247
perltest.pl Executable file
View File

@ -0,0 +1,247 @@
#! /usr/bin/env perl
# Program for testing regular expressions with perl to check that PCRE2 handles
# them the same. This version needs to have "use utf8" at the start for running
# the UTF-8 tests, but *not* for the other tests. The only way I've found for
# doing this is to cat this line in explicitly in the RunPerlTest script. I've
# also used this method to supply "require Encode" for the UTF-8 tests, so that
# the main test will still run where Encode is not installed.
#use utf8;
#require Encode;
# Function for turning a string into a string of printing chars.
sub pchars {
my($t) = "";
if ($utf8)
{
@p = unpack('U*', $_[0]);
foreach $c (@p)
{
if ($c >= 32 && $c < 127) { $t .= chr $c; }
else { $t .= sprintf("\\x{%02x}", $c);
}
}
}
else
{
foreach $c (split(//, $_[0]))
{
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
else { $t .= sprintf("\\x%02x", ord $c); }
}
}
$t;
}
# Read lines from named file or stdin and write to named file or stdout; lines
# consist of a regular expression, in delimiters and optionally followed by
# options, followed by a set of test data, terminated by an empty line.
# Sort out the input and output files
if (@ARGV > 0)
{
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
$infile = "INFILE";
}
else { $infile = "STDIN"; }
if (@ARGV > 1)
{
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
$outfile = "OUTFILE";
}
else { $outfile = "STDOUT"; }
printf($outfile "Perl $] Regular Expressions\n\n");
# Main loop
NEXT_RE:
for (;;)
{
printf " re> " if $infile eq "STDIN";
last if ! ($_ = <$infile>);
printf $outfile "$_" if $infile ne "STDIN";
next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
$pattern = $_;
while ($pattern !~ /^\s*(.).*\1/s)
{
printf " > " if $infile eq "STDIN";
last if ! ($_ = <$infile>);
printf $outfile "$_" if $infile ne "STDIN";
$pattern .= $_;
}
chomp($pattern);
$pattern =~ s/\s+$//;
# Split the pattern from the modifiers and adjust them as necessary.
$pattern =~ /^\s*((.).*\2)(.*)$/s;
$pat = $1;
$mod = $3;
# The private "aftertext" modifier means "print $' afterwards".
$showrest = ($mod =~ s/aftertext,?//);
# "allaftertext" is used by pcretest to print remainders after captures
$mod =~ s/allaftertext,?//;
# Detect utf
$utf8 = $mod =~ s/utf,?//;
# Remove "dupnames".
$mod =~ s/dupnames,?//;
# Remove "mark" (asks pcre2test to check MARK data) */
$mod =~ s/mark,?//;
# "ucp" asks pcre2test to set PCRE_UCP; change this to /u for Perl
$mod =~ s/W(?=[a-zA-Z]*$)/u/;
# Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
$mod =~ s/no_auto_possess,?//;
$mod =~ s/no_start_optimize,?//;
# Add back retained modifiers and check that the pattern is valid.
$mod =~ s/,//g;
$pattern = "$pat$mod";
eval "\$_ =~ ${pattern}";
if ($@)
{
printf $outfile "Error: $@";
if ($infile != "STDIN")
{
for (;;)
{
last if ! ($_ = <$infile>);
last if $_ =~ /^\s*$/;
}
}
next NEXT_RE;
}
# If the /g modifier is present, we want to put a loop round the matching;
# otherwise just a single "if".
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
# If the pattern is actually the null string, Perl uses the most recently
# executed (and successfully compiled) regex is used instead. This is a
# nasty trap for the unwary! The PCRE2 test suite does contain null strings
# in places - if they are allowed through here all sorts of weird and
# unexpected effects happen. To avoid this, we replace such patterns with
# a non-null pattern that has the same effect.
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
# Read data lines and test them
for (;;)
{
printf "data> " if $infile eq "STDIN";
last NEXT_RE if ! ($_ = <$infile>);
chomp;
printf $outfile "$_\n" if $infile ne "STDIN";
s/\s+$//; # Remove trailing space
s/^\s+//; # Remove leading space
s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
last if ($_ eq "");
$x = eval "\"$_\""; # To get escapes processed
# Empty array for holding results, ensure $REGERROR and $REGMARK are
# unset, then do the matching.
@subs = ();
$pushes = "push \@subs,\$&;" .
"push \@subs,\$1;" .
"push \@subs,\$2;" .
"push \@subs,\$3;" .
"push \@subs,\$4;" .
"push \@subs,\$5;" .
"push \@subs,\$6;" .
"push \@subs,\$7;" .
"push \@subs,\$8;" .
"push \@subs,\$9;" .
"push \@subs,\$10;" .
"push \@subs,\$11;" .
"push \@subs,\$12;" .
"push \@subs,\$13;" .
"push \@subs,\$14;" .
"push \@subs,\$15;" .
"push \@subs,\$16;" .
"push \@subs,\$'; }";
undef $REGERROR;
undef $REGMARK;
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
if ($@)
{
printf $outfile "Error: $@\n";
next NEXT_RE;
}
elsif (scalar(@subs) == 0)
{
printf $outfile "No match";
if (defined $REGERROR && $REGERROR != 1)
{ printf $outfile (", mark = %s", &pchars($REGERROR)); }
printf $outfile "\n";
}
else
{
while (scalar(@subs) != 0)
{
printf $outfile (" 0: %s\n", &pchars($subs[0]));
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
$last_printed = 0;
for ($i = 1; $i <= 16; $i++)
{
if (defined $subs[$i])
{
while ($last_printed++ < $i-1)
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
$last_printed = $i;
}
}
splice(@subs, 0, 18);
}
# It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
# set and the input pattern was a UTF-8 string. We can, however, force
# it to be so marked.
if (defined $REGMARK && $REGMARK != 1)
{
$xx = $REGMARK;
$xx = Encode::decode_utf8($xx) if $utf8;
printf $outfile ("MK: %s\n", &pchars($xx));
}
}
}
}
# printf $outfile "\n";
# End

View File

@ -561,7 +561,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77 }; ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such /* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -1703,10 +1703,10 @@ else
ptr += 4; ptr += 4;
if (utf) if (utf)
{ {
if (c > 0x10ffffU) *errorcodeptr = ERR76; if (c > 0x10ffffU) *errorcodeptr = ERR77;
else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
} }
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR76; else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
} }
break; break;
@ -1815,12 +1815,11 @@ else
recommended to avoid the ambiguities in the old syntax. recommended to avoid the ambiguities in the old syntax.
Outside a character class, the digits are read as a decimal number. If the Outside a character class, the digits are read as a decimal number. If the
number is less than 8 (used to be 10), or if there are that many previous number is less than 10, or if there are that many previous extracting left
extracting left brackets, then it is a back reference. Otherwise, up to brackets, it is a back reference. Otherwise, up to three octal digits are
three octal digits are read to form an escaped byte. Thus \123 is likely to read to form an escaped byte. Thus \123 is likely to be octal 123 (cf
be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If \0123, which is octal 012 followed by the literal 3). If the octal value is
the octal value is greater than 377, the least significant 8 bits are greater than 377, the least significant 8 bits are taken.
taken. \8 and \9 are treated as the literal characters 8 and 9.
Inside a character class, \ followed by a digit is always either a literal Inside a character class, \ followed by a digit is always either a literal
8 or 9 or an octal number. */ 8 or 9 or an octal number. */
@ -1832,7 +1831,7 @@ else
{ {
oldptr = ptr; oldptr = ptr;
/* The integer range is limited by the machine's int representation. */ /* The integer range is limited by the machine's int representation. */
s = (int)(c -CHAR_0); s = (int)(c - CHAR_0);
overflow = FALSE; overflow = FALSE;
while (IS_DIGIT(ptr[1])) while (IS_DIGIT(ptr[1]))
{ {
@ -1849,7 +1848,7 @@ else
*errorcodeptr = ERR61; *errorcodeptr = ERR61;
break; break;
} }
if (s < 8 || s <= cb->bracount) /* Check for back reference */ if (s < 10 || s <= cb->bracount) /* Check for back reference */
{ {
escape = -s; escape = -s;
break; break;
@ -1886,7 +1885,7 @@ else
case CHAR_o: case CHAR_o:
if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR77; else if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else
{ {
ptr += 2; ptr += 2;
c = 0; c = 0;
@ -1947,7 +1946,7 @@ else
ptr += 2; ptr += 2;
if (*ptr == CHAR_RIGHT_CURLY_BRACKET) if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
{ {
*errorcodeptr = ERR77; *errorcodeptr = ERR78;
break; break;
} }
c = 0; c = 0;
@ -1955,12 +1954,12 @@ else
while ((cc = XDIGIT(*ptr)) != 0xff) while ((cc = XDIGIT(*ptr)) != 0xff)
{ {
ptr++;
if (c == 0 && cc == 0) continue; /* Leading zeroes */ if (c == 0 && cc == 0) continue; /* Leading zeroes */
#if PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x10000000l) { overflow = TRUE; break; } if (c >= 0x10000000l) { overflow = TRUE; break; }
#endif #endif
c = (c << 4) | cc; c = (c << 4) | cc;
ptr++;
if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR)) if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
{ {
overflow = TRUE; overflow = TRUE;
@ -2002,9 +2001,9 @@ else
break; break;
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
An error is given if the byte following \c is not an ASCII character. This An error is given if the byte following \c is not a printable ASCII
coding is ASCII-specific, but then the whole concept of \cx is character. This coding is ASCII-specific, but then the whole concept of \cx
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ is ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
case CHAR_c: case CHAR_c:
c = *(++ptr); c = *(++ptr);
@ -2014,7 +2013,7 @@ else
break; break;
} }
#ifndef EBCDIC /* ASCII/UTF-8 coding */ #ifndef EBCDIC /* ASCII/UTF-8 coding */
if (c > 127) /* Excludes all non-ASCII in either mode */ if (c < 32 || c > 126) /* Excludes all non-printable ASCII */
{ {
*errorcodeptr = ERR68; *errorcodeptr = ERR68;
break; break;
@ -3820,7 +3819,7 @@ for (;; ptr++)
{ {
ptr += 2; ptr += 2;
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
{ ptr += 2; goto CONTINUE_CLASS; } { ptr += 2; continue; }
inescq = TRUE; inescq = TRUE;
break; break;
} }
@ -4981,7 +4980,7 @@ for (;; ptr++)
arglen = (int)(ptr - arg); arglen = (int)(ptr - arg);
if ((unsigned int)arglen > MAX_MARK) if ((unsigned int)arglen > MAX_MARK)
{ {
*errorcodeptr = ERR75; *errorcodeptr = ERR76;
goto FAILED; goto FAILED;
} }
} }
@ -6548,10 +6547,9 @@ Returns: TRUE on success
static BOOL static BOOL
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr, compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipunits, int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipunits,
int cond_depth, int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
uint32_t *firstcuptr, int32_t *firstcuflagsptr, uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
uint32_t *reqcuptr, int32_t *reqcuflagsptr, compile_block *cb, size_t *lengthptr)
branch_chain *bcptr, compile_block *cb, size_t *lengthptr)
{ {
PCRE2_SPTR ptr = *ptrptr; PCRE2_SPTR ptr = *ptrptr;
PCRE2_UCHAR *code = *codeptr; PCRE2_UCHAR *code = *codeptr;
@ -6569,15 +6567,13 @@ unsigned int orig_bracount;
unsigned int max_bracount; unsigned int max_bracount;
branch_chain bc; branch_chain bc;
#ifdef FIXME
/* If set, call the external function that checks for stack availability. */ /* If set, call the external function that checks for stack availability. */
if (ccontext->stack_guard != NULL && ccontext->stack_guard(0)) if (cb->cx->stack_guard != NULL && cb->cx->stack_guard(cb->parens_depth))
{ {
*errorcodeptr= ERR33; *errorcodeptr= ERR33;
return FALSE; return FALSE;
} }
#endif
/* Miscellaneous initialization */ /* Miscellaneous initialization */
@ -7434,7 +7430,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + ptr[pp++] - CHAR_0; c = c*10 + ptr[pp++] - CHAR_0;
} }
if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) goto END_PSO; if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
goto HAD_ERROR;
}
if (p->type == PSO_LIMM) limit_match = c; if (p->type == PSO_LIMM) limit_match = c;
else limit_recursion = c; else limit_recursion = c;
skipatstart += pp - skipatstart; skipatstart += pp - skipatstart;
@ -7443,12 +7443,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
break; /* Out of the table scan loop */ break; /* Out of the table scan loop */
} }
} }
if (i > sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
} }
/* End of pattern-start options; advance to start of real regex. */ /* End of pattern-start options; advance to start of real regex. */
END_PSO:
ptr += skipatstart; ptr += skipatstart;
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */ /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
@ -7477,6 +7476,15 @@ if (utf)
goto HAD_ERROR; goto HAD_ERROR;
} }
/* Check UCP lockout. */
if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
(PCRE2_UCP|PCRE2_NEVER_UCP))
{
errorcode = ERR75;
goto HAD_ERROR;
}
/* Process the BSR setting. */ /* Process the BSR setting. */
if (bsr == 0) bsr = ccontext->bsr_convention; if (bsr == 0) bsr = ccontext->bsr_convention;

View File

@ -148,15 +148,16 @@ static const char compile_error_texts[] =
"different names for subpatterns of the same number are not allowed\0" "different names for subpatterns of the same number are not allowed\0"
"(*MARK) must have an argument\0" "(*MARK) must have an argument\0"
"non-hex character in \\x{} (closing brace missing?)\0" "non-hex character in \\x{} (closing brace missing?)\0"
"\\c must be followed by an ASCII character\0" "\\c must be followed by a printable ASCII character\0"
"\\k is not followed by a braced, angle-bracketed, or quoted name\0" "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */ /* 70 */
"internal error: unknown opcode in find_fixedlength()\0" "internal error: unknown opcode in find_fixedlength()\0"
"\\N is not supported in a class\0" "\\N is not supported in a class\0"
"too many forward references\0" "too many forward references\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"using (*UTF) is disabled by the application\0" "using UTF is disabled by the application\0"
/* 75 */ /* 75 */
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0" "character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0" "digits missing in \\x{} or \\o{}\0"
@ -223,7 +224,7 @@ static const char match_error_texts[] =
"JIT stack limit reached\0" "JIT stack limit reached\0"
"match limit exceeded\0" "match limit exceeded\0"
"no more memory\0" "no more memory\0"
"unknown substring\0" "unknown or unset substring\0"
/* 50 */ /* 50 */
"NULL argument passed\0" "NULL argument passed\0"
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"

View File

@ -6782,6 +6782,12 @@ ENDLOOP:
release_match_heapframes(&frame_zero, mb); release_match_heapframes(&frame_zero, mb);
#endif #endif
/* Fill in fields that are always returned in the match data. */
match_data->code = re;
match_data->subject = subject;
match_data->mark = mb->mark;
/* Handle a fully successful match. */ /* Handle a fully successful match. */
if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
@ -6842,25 +6848,26 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject; match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
} }
/* Fill in the remaining fields that are returned in the match data. */ /* Set the remaining returned values */
match_data->code = re;
match_data->subject = subject;
match_data->leftchar = mb->start_used_ptr - subject; match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = 0; /* FIXME */ match_data->rightchar = 0; /* FIXME */
match_data->startchar = start_match - subject; match_data->startchar = start_match - subject;
match_data->mark = mb->mark;
return match_data->rc; return match_data->rc;
} }
/* Control gets here if there has been a partial match, an error, or if the /* Control gets here if there has been a partial match, an error, or if the
overall match attempt has failed at all permitted starting positions. For overall match attempt has failed at all permitted starting positions. Any mark
anything other than nomatch or partial match, just return the code. */ data is in the nomatch_mark field. */
match_data->mark = mb->nomatch_mark;
/* For anything other than nomatch or partial match, just return the code. */
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
match_data->rc = rc; match_data->rc = rc;
/* Handle a partial match. */ /* Else handle a partial match. */
else if (match_partial != NULL) else if (match_partial != NULL)
{ {
@ -6870,16 +6877,16 @@ else if (match_partial != NULL)
match_data->ovector[1] = end_subject - subject; match_data->ovector[1] = end_subject - subject;
} }
match_data->leftchar = start_partial - subject; match_data->leftchar = start_partial - subject;
match_data->rightchar = 0; /* FIXME */
match_data->startchar = match_partial - subject;
match_data->rc = PCRE2_ERROR_PARTIAL; match_data->rc = PCRE2_ERROR_PARTIAL;
} }
/* This is the classic nomatch case. */ /* Else this is the classic nomatch case. */
else else match_data->rc = PCRE2_ERROR_NOMATCH;
{
match_data->rc = PCRE2_ERROR_NOMATCH; /* Free any temporary offsets. */
match_data->mark = mb->nomatch_mark;
}
if (using_temporary_offsets) if (using_temporary_offsets)
mb->memctl.free(mb->ovector, mb->memctl.memory_data); mb->memctl.free(mb->ovector, mb->memctl.memory_data);

View File

@ -119,6 +119,7 @@ size_t left, right;
size_t p = 0; size_t p = 0;
PCRE2_SPTR subject = match_data->subject; PCRE2_SPTR subject = match_data->subject;
if (stringnumber >= match_data->oveccount || if (stringnumber >= match_data->oveccount ||
stringnumber > match_data->code->top_bracket ||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET) (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1]; right = match_data->ovector[stringnumber*2+1];
@ -203,6 +204,7 @@ PCRE2_UCHAR *yield;
PCRE2_SPTR subject = match_data->subject; PCRE2_SPTR subject = match_data->subject;
if (stringnumber >= match_data->oveccount || if (stringnumber >= match_data->oveccount ||
stringnumber > match_data->code->top_bracket ||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET) (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1]; right = match_data->ovector[stringnumber*2+1];
@ -293,6 +295,7 @@ pcre2_substring_length_bynumber(pcre2_match_data *match_data,
int stringnumber) int stringnumber)
{ {
if (stringnumber >= match_data->oveccount || if (stringnumber >= match_data->oveccount ||
stringnumber > match_data->code->top_bracket ||
match_data->ovector[stringnumber*2] == PCRE2_UNSET) match_data->ovector[stringnumber*2] == PCRE2_UNSET)
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
return match_data->ovector[stringnumber*2 + 1] - return match_data->ovector[stringnumber*2 + 1] -

View File

@ -46,7 +46,6 @@ POSSIBILITY OF SUCH DAMAGE.
. save code and #load . save code and #load
. JIT - compile, time, verify . JIT - compile, time, verify
. memory handling testing . memory handling testing
. stackguard testing
*/ */
@ -435,7 +434,7 @@ static modstruct modlist[] = {
{ "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
{ "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
{ "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
{ "dupnames", MOD_PAT, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
{ "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
{ "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
@ -612,6 +611,7 @@ clock_t total_compile_time = 0;
clock_t total_match_time = 0; clock_t total_match_time = 0;
static uint32_t dfa_matched; static uint32_t dfa_matched;
static uint32_t forbid_utf = 0;
static uint32_t max_oveccount; static uint32_t max_oveccount;
static uint32_t callout_count; static uint32_t callout_count;
@ -831,6 +831,14 @@ are supported. */
else \ else \
pcre2_set_character_tables_32(G(a,32),b) pcre2_set_character_tables_32(G(a,32),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_compile_recursion_guard_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
pcre2_set_compile_recursion_guard_16(G(a,16),b); \
else \
pcre2_set_compile_recursion_guard_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \ #define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \ if (test_mode == PCRE8_MODE) \
pcre2_set_match_limit_8(G(a,8),b); \ pcre2_set_match_limit_8(G(a,8),b); \
@ -1102,6 +1110,12 @@ the three different cases. */
else \ else \
G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b) G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b); \
else \
G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \ #define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@ -1245,8 +1259,10 @@ the three different cases. */
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
#define PCRE2_SET_CALLOUT(a,b,c) \ #define PCRE2_SET_CALLOUT(a,b,c) \
pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c); pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c)
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
pcre2_set_compile_recursion_guard_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
@ -1304,12 +1320,14 @@ the three different cases. */
#define PCRE2_SET_CALLOUT(a,b,c) \ #define PCRE2_SET_CALLOUT(a,b,c) \
pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *))b,c); pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *))b,c);
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b) #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
pcre2_set_compile_recursion_guard_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a) #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d) \ #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d) \
a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d) a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d)
@ -1361,10 +1379,12 @@ the three different cases. */
#define PCRE2_SET_CALLOUT(a,b,c) \ #define PCRE2_SET_CALLOUT(a,b,c) \
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *))b,c); pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *))b,c);
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b) #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
pcre2_set_compile_recursion_guard_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e); a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e); a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a) #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
@ -1766,6 +1786,25 @@ free(block);
#endif /* NO_RECURSE */ #endif /* NO_RECURSE */
/*************************************************
* Callback function for stack guard *
*************************************************/
/* This is set up to be called from pcre2_compile() when the stackguard=n
modifier sets a value greater than zero. The test we do is whether the
parenthesis nesting depth is greater than the value set by the modifier.
Argument: the current parenthesis nesting depth
Returns: non-zero to kill the compilation
*/
static int
stack_guard(uint32_t depth)
{
return depth > pat_patctl.stackguard_test;
}
/************************************************* /*************************************************
* Convert UTF-8 character to code point * * Convert UTF-8 character to code point *
*************************************************/ *************************************************/
@ -2031,16 +2070,16 @@ return i + 1;
#ifdef SUPPORT_PCRE16 #ifdef SUPPORT_PCRE16
/************************************************* /*************************************************
* Convert a string to 16-bit * * Convert pattern to 16-bit *
*************************************************/ *************************************************/
/* The input is always interpreted as a string of UTF-8 bytes. If all the input /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
bytes are ASCII, the space needed for a 16-bit string is exactly double the all the input bytes are ASCII, the space needed for a 16-bit string is exactly
8-bit size. Otherwise, the size needed for a 16-bit string is no more than double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
result is always left in pbuffer16. Impose a minimum size to save repeated UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
re-sizing. repeated re-sizing.
Note that this function does not object to surrogate values. This is Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-16 strings that are invalid, deliberate; it makes it possible to construct UTF-16 strings that are invalid,
@ -2074,9 +2113,13 @@ if (pbuffer16_size < 2*len + 2)
exit(1); exit(1);
} }
} }
pp = pbuffer16;
while (len > 0) pp = pbuffer16;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
}
else while (len > 0)
{ {
uint32_t c; uint32_t c;
int chlen = utf82ord(p, &c); int chlen = utf82ord(p, &c);
@ -2102,15 +2145,15 @@ return pp - pbuffer16;
#ifdef SUPPORT_PCRE32 #ifdef SUPPORT_PCRE32
/************************************************* /*************************************************
* Convert a string to 32-bit * * Convert pattern to 32-bit *
*************************************************/ *************************************************/
/* The input is always interpreted as a string of UTF-8 bytes. If all the input /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
bytes are ASCII, the space needed for a 32-bit string is exactly four times the all the input bytes are ASCII, the space needed for a 32-bit string is exactly
8-bit size. Otherwise, the size needed for a 32-bit string is no more than four four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
times, because the number of characters must be less than the number of bytes. more than four times, because the number of characters must be less than the
The result is always left in pbuffer32. Impose a minimum size to save repeated number of bytes. The result is always left in pbuffer32. Impose a minimum size
re-sizing. to save repeated re-sizing.
Note that this function does not object to surrogate values. This is Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-32 strings that are invalid, deliberate; it makes it possible to construct UTF-32 strings that are invalid,
@ -2143,9 +2186,13 @@ if (pbuffer32_size < 4*len + 4)
exit(1); exit(1);
} }
} }
pp = pbuffer32;
while (len > 0) pp = pbuffer32;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
}
else while (len > 0)
{ {
uint32_t c; uint32_t c;
int chlen = utf82ord(p, &c); int chlen = utf82ord(p, &c);
@ -3021,8 +3068,25 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */
if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
{
compile_options &= ~PCRE2_NEVER_UTF;
overall_options &= ~PCRE2_NEVER_UTF;
}
if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
{
compile_options &= ~PCRE2_NEVER_UCP;
overall_options &= ~PCRE2_NEVER_UCP;
}
if ((compile_options|overall_options) == 0) if ((compile_options|overall_options) == 0)
fprintf(outfile, "No options\n"); fprintf(outfile, "No options\n");
else if (compile_options == overall_options)
show_compile_options(compile_options, "Options:", "\n");
else else
{ {
show_compile_options(compile_options, "Compile options:", "\n"); show_compile_options(compile_options, "Compile options:", "\n");
@ -3035,26 +3099,26 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
"any Unicode newline" : "CR, LF, or CRLF"); "any Unicode newline" : "CR, LF, or CRLF");
switch (newline_convention) if (newline_convention != NEWLINE_DEFAULT) switch (newline_convention)
{ {
case PCRE2_NEWLINE_CR: case PCRE2_NEWLINE_CR:
fprintf(outfile, "Newline is CR\n"); fprintf(outfile, "Forced newline is CR\n");
break; break;
case PCRE2_NEWLINE_LF: case PCRE2_NEWLINE_LF:
fprintf(outfile, "Newline is LF\n"); fprintf(outfile, "Forced newline is LF\n");
break; break;
case PCRE2_NEWLINE_CRLF: case PCRE2_NEWLINE_CRLF:
fprintf(outfile, "Newline is CRLF\n"); fprintf(outfile, "Forced newline is CRLF\n");
break; break;
case PCRE2_NEWLINE_ANYCRLF: case PCRE2_NEWLINE_ANYCRLF:
fprintf(outfile, "Newline is CR, LF, or CRLF\n"); fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
break; break;
case PCRE2_NEWLINE_ANY: case PCRE2_NEWLINE_ANY:
fprintf(outfile, "Newline is any Unicode newline\n"); fprintf(outfile, "Forced newline is any Unicode newline\n");
break; break;
default: default:
@ -3063,7 +3127,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (first_ctype == 2) if (first_ctype == 2)
{ {
fprintf(outfile, "First char at start or follows newline\n"); fprintf(outfile, "First code unit at start or follows newline\n");
} }
else if (first_ctype == 1) else if (first_ctype == 1)
{ {
@ -3079,35 +3143,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "%s\n", caseless); fprintf(outfile, "%s\n", caseless);
} }
} }
else else if (start_bits != NULL)
{
fprintf(outfile, "No first code unit\n");
}
if (last_ctype == 0)
{
fprintf(outfile, "No last code unit\n");
}
else
{
const char *caseless =
((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
"" : " (caseless)";
if (PRINTOK(last_cunit))
fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
else
{
fprintf(outfile, "Last code unit = ");
pchar(last_cunit, FALSE, outfile);
fprintf(outfile, "%s\n", caseless);
}
}
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
if (start_bits == NULL)
fprintf(outfile, "No starting code unit list\n");
else
{ {
int i; int i;
int c = 24; int c = 24;
@ -3135,6 +3171,31 @@ if ((pat_patctl.control & CTL_INFO) != 0)
} }
fprintf(outfile, "\n"); fprintf(outfile, "\n");
} }
else
{
fprintf(outfile, "No first code unit\n");
}
if (last_ctype == 0)
{
fprintf(outfile, "No last code unit\n");
}
else
{
const char *caseless =
((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
"" : " (caseless)";
if (PRINTOK(last_cunit))
fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
else
{
fprintf(outfile, "Last code unit = ");
pchar(last_cunit, FALSE, outfile);
fprintf(outfile, "%s\n", caseless);
}
}
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
/* FIXME: tidy this up */ /* FIXME: tidy this up */
@ -3183,7 +3244,11 @@ if (restrict_for_perl_test)
return PR_ABEND; return PR_ABEND;
} }
if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8])) if (strncmp((char *)buffer, "#forbid_utf", 11) == 0 && isspace(buffer[11]))
{
forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
}
else if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
{ {
(void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL); (void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
} }
@ -3491,6 +3556,13 @@ else switch (pat_patctl.tables_id)
PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables); PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
/* Set up for the stackguard test. */
if (pat_patctl.stackguard_test != 0)
{
PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
}
/* Handle compiling via the POSIX interface, which doesn't support the /* Handle compiling via the POSIX interface, which doesn't support the
timing, showing, or debugging options, nor the ability to pass over timing, showing, or debugging options, nor the ability to pass over
local character tables. Neither does it have 16-bit or 32-bit support. */ local character tables. Neither does it have 16-bit or 32-bit support. */
@ -3604,7 +3676,7 @@ if (timeit > 0)
for (i = 0; i < timeit; i++) for (i = 0; i < timeit; i++)
{ {
PCRE2_COMPILE(compiled_code, pbuffer, patlen, PCRE2_COMPILE(compiled_code, pbuffer, patlen,
pat_patctl.options, &errorcode, &erroroffset, pat_context); pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context);
if (TEST(compiled_code, !=, NULL)) if (TEST(compiled_code, !=, NULL))
{ SUB1(pcre2_code_free, compiled_code); } { SUB1(pcre2_code_free, compiled_code); }
} }
@ -3618,8 +3690,8 @@ if (timeit > 0)
/* A final compile that is used "for real". */ /* A final compile that is used "for real". */
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode, PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
&erroroffset, pat_context); &errorcode, &erroroffset, pat_context);
/* Compilation failed; go back for another re, skipping to blank line /* Compilation failed; go back for another re, skipping to blank line
if non-interactive. */ if non-interactive. */
@ -3782,14 +3854,12 @@ for (;;)
min = mid; min = mid;
mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2; mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
} }
else if (capcount >= 0 || else if (capcount >= 0 ||
capcount == PCRE2_ERROR_NOMATCH || capcount == PCRE2_ERROR_NOMATCH ||
capcount == PCRE2_ERROR_PARTIAL) capcount == PCRE2_ERROR_PARTIAL)
{ {
if (mid == min + 1) if (mid == min + 1)
{ {
if (capcount != PCRE2_ERROR_NOMATCH)
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
break; break;
} }
@ -4184,9 +4254,12 @@ while ((c = *p++) != 0)
continue; continue;
default: default:
if (isalnum(c))
{
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
return PR_OK; return PR_OK;
} }
}
/* We now have a character value in c that may be greater than 255. /* We now have a character value in c that may be greater than 255.
In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
@ -4608,7 +4681,12 @@ for (gmatched = 0;; gmatched++)
PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer,
sizeof(copybuffer)/code_unit_size); sizeof(copybuffer)/code_unit_size);
if (rc < 0) if (rc < 0)
fprintf(outfile, "copy substring %d failed %d\n", n, rc); {
fprintf(outfile, "copy substring %d failed (%d): ", n, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
else else
{ {
fprintf(outfile, "%2dC ", n); fprintf(outfile, "%2dC ", n);
@ -4641,7 +4719,10 @@ for (gmatched = 0;; gmatched++)
copybuffer, sizeof(copybuffer)/code_unit_size); copybuffer, sizeof(copybuffer)/code_unit_size);
if (rc < 0) if (rc < 0)
{ {
fprintf(outfile, "copy substring '%s' failed %d\n", nptr, rc); fprintf(outfile, "copy substring '%s' failed (%d): ", nptr, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
} }
else else
{ {
@ -4661,7 +4742,12 @@ for (gmatched = 0;; gmatched++)
uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]); uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer); PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer);
if (rc < 0) if (rc < 0)
fprintf(outfile, "get substring %d failed %d\n", n, rc); {
fprintf(outfile, "get substring %d failed (%d): ", n, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
else else
{ {
fprintf(outfile, "%2dG ", n); fprintf(outfile, "%2dG ", n);
@ -4694,7 +4780,10 @@ for (gmatched = 0;; gmatched++)
PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer); PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer);
if (rc < 0) if (rc < 0)
{ {
fprintf(outfile, "get substring '%s' failed %d\n", nptr, rc); fprintf(outfile, "get substring '%s' failed (%d): ", nptr, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
} }
else else
{ {
@ -4715,7 +4804,12 @@ for (gmatched = 0;; gmatched++)
size_t *lengths; size_t *lengths;
PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths); PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
if (rc < 0) if (rc < 0)
fprintf(outfile, "get substring list failed %d\n", rc); {
fprintf(outfile, "get substring list failed (%d): ", rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
else else
{ {
for (i = 0; i < capcount; i++) for (i = 0; i < capcount; i++)
@ -4737,7 +4831,6 @@ for (gmatched = 0;; gmatched++)
else if (capcount == PCRE2_ERROR_PARTIAL) else if (capcount == PCRE2_ERROR_PARTIAL)
{ {
PCRE2_OFFSET leftchar = FLD(match_data, leftchar); PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
fprintf(outfile, "Partial match"); fprintf(outfile, "Partial match");
if (leftchar != FLD(match_data, startchar)) if (leftchar != FLD(match_data, startchar))
fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar)); fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
@ -4880,8 +4973,8 @@ for (gmatched = 0;; gmatched++)
else else
{ {
pp += end_offset * code_unit_size; pp += end_offset * code_unit_size;
len -= end_offset; len -= end_offset * code_unit_size;
ulen -= end_offset *code_unit_size; ulen -= end_offset;
} }
} }
} /* End of global loop */ } /* End of global loop */
@ -4894,7 +4987,7 @@ return PR_OK;
/************************************************* /*************************************************
* Print PCRE version * * Print PCRE2 version *
*************************************************/ *************************************************/
/* The version string was read into 'version' at the start of execution. */ /* The version string was read into 'version' at the start of execution. */
@ -4903,7 +4996,7 @@ static void
print_version(FILE *f) print_version(FILE *f)
{ {
VERSION_TYPE *vp; VERSION_TYPE *vp;
fprintf(f, "PCRE version "); fprintf(f, "PCRE2 version ");
for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp); for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
fprintf(f, "\n"); fprintf(f, "\n");
} }
@ -4976,6 +5069,7 @@ printf(" -d set default pattern control 'debug'\n");
printf(" -dfa set default subject control 'dfa'\n"); printf(" -dfa set default subject control 'dfa'\n");
printf(" -help show usage information\n"); printf(" -help show usage information\n");
printf(" -i set default pattern control 'info'\n"); printf(" -i set default pattern control 'info'\n");
printf(" -jit set default pattern control 'jit'\n");
printf(" -q quiet: do not output PCRE version number at start\n"); printf(" -q quiet: do not output PCRE version number at start\n");
printf(" -pattern <s> set default pattern control fields\n"); printf(" -pattern <s> set default pattern control fields\n");
printf(" -subject <s> set default subject control fields\n"); printf(" -subject <s> set default subject control fields\n");
@ -5261,10 +5355,18 @@ while (argc > 1 && argv[op][0] == '-')
/* Set some common pattern and subject controls */ /* Set some common pattern and subject controls */
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; else if (strcmp(arg, "-jit") == 0)
{
def_patctl.jit = 7; /* full & partial */
#ifndef SUPPORT_JIT
fprintf(stderr, "** Warning: JIT support is not available: "
"-jit calls dummy functions.\n");
#endif
}
/* Set timing parameters */ /* Set timing parameters */
@ -5503,7 +5605,8 @@ while (notdone)
while (isspace(*p)) p++; while (isspace(*p)) p++;
if (*p != 0) if (*p != 0)
{ {
fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer); fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
*buffer);
rc = PR_SKIP; rc = PR_SKIP;
} }
} }

5695
testdata/testinput1 vendored Normal file

File diff suppressed because it is too large Load Diff

4034
testdata/testinput2 vendored Normal file

File diff suppressed because it is too large Load Diff

9389
testdata/testoutput1 vendored Normal file

File diff suppressed because it is too large Load Diff

14400
testdata/testoutput2 vendored Normal file

File diff suppressed because it is too large Load Diff