Tests 1 and 2 are converted (but without save/restore).
This commit is contained in:
parent
1701838220
commit
017b6a1624
|
@ -0,0 +1,995 @@
|
|||
#! /bin/sh
|
||||
|
||||
###############################################################################
|
||||
# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
|
||||
# selected, depending on which build-time options were used.
|
||||
#
|
||||
# When JIT support is available, all appropriate tests are run with and without
|
||||
# JIT, unless "nojit" is given on the command line. There are also two tests
|
||||
# for JIT-specific features, one to be run when JIT support is available
|
||||
# (unless "nojit" is specified), and one when it is not.
|
||||
#
|
||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||
# command line.
|
||||
#
|
||||
# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
|
||||
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
|
||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||
# in numerical order.
|
||||
#
|
||||
# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
|
||||
# locale test, and failure usually means there's an issue with the locale
|
||||
# rather than a bug in PCRE2, so normally subsequent tests are run. "3S" is
|
||||
# useful when you want to debug or update the test.
|
||||
#
|
||||
# Inappropriate tests are automatically skipped (with a comment to say so): for
|
||||
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
|
||||
# support is compiled, test 13 is skipped.
|
||||
#
|
||||
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
||||
# followed by an argument to run cross-compiled executables under a simulator,
|
||||
# for example:
|
||||
#
|
||||
# RunTest 3 sim "qemu-arm -s 8388608"
|
||||
#
|
||||
# There are two special cases where only one argument is allowed:
|
||||
#
|
||||
# If the first and only argument is "ebcdic", the script runs the special
|
||||
# EBCDIC test that can be useful for checking certain EBCDIC features, even
|
||||
# when run in an ASCII environment.
|
||||
#
|
||||
# If the script is obeyed as "RunTest list", a list of available tests is
|
||||
# output, but none of them are run.
|
||||
###############################################################################
|
||||
|
||||
# Define test titles in variables so that they can be output as a list. Some
|
||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||
|
||||
title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
|
||||
title2="Test 2: API, errors, internals, and non-Perl stuff"
|
||||
#title3="Test 3: Locale-specific features"
|
||||
#title4A="Test 4: UTF"
|
||||
#title4B=" support (Compatible with Perl >= 5.10)"
|
||||
#title5="Test 5: API, internals, and non-Perl stuff for UTF"
|
||||
#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
|
||||
#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
|
||||
#title8="Test 8: DFA matching main functionality"
|
||||
#title9="Test 9: DFA matching with UTF"
|
||||
#title10="Test 10: DFA matching with Unicode properties"
|
||||
#title11="Test 11: Internal offsets and code size tests"
|
||||
#title12="Test 12: JIT-specific features (when JIT is available)"
|
||||
#title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||
#title14="Test 14: Specials for the basic 8-bit library"
|
||||
#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||
#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||
#title17="Test 17: Specials for the basic 16/32-bit library"
|
||||
#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
|
||||
#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
|
||||
#title20="Test 20: DFA specials for the basic 16/32-bit library"
|
||||
#title21="Test 21: Reloads for the basic 16/32-bit library"
|
||||
#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
|
||||
#title23="Test 23: Specials for the 16-bit library"
|
||||
#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
|
||||
#title25="Test 25: Specials for the 32-bit library"
|
||||
#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
||||
|
||||
maxtest=2
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title1
|
||||
echo $title2 "(not UTF)"
|
||||
# echo $title3
|
||||
# echo $title4A $title4B
|
||||
# echo $title5 support
|
||||
# echo $title6
|
||||
# echo $title7
|
||||
# echo $title8
|
||||
# echo $title9
|
||||
# echo $title10
|
||||
# echo $title11
|
||||
# echo $title12
|
||||
# echo $title13
|
||||
# echo $title14
|
||||
# echo $title15
|
||||
# echo $title16
|
||||
# echo $title17
|
||||
# echo $title18
|
||||
# echo $title19
|
||||
# echo $title20
|
||||
# echo $title21
|
||||
# echo $title22
|
||||
# echo $title23
|
||||
# echo $title24
|
||||
# echo $title25
|
||||
# echo $title26
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Set up a suitable "diff" command for comparison. Some systems
|
||||
# have a diff that lacks a -u option. Try to deal with this.
|
||||
|
||||
cf="diff"
|
||||
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
|
||||
|
||||
# Find the test data
|
||||
|
||||
if [ -n "$srcdir" -a -d "$srcdir" ] ; then
|
||||
testdata="$srcdir/testdata"
|
||||
elif [ -d "./testdata" ] ; then
|
||||
testdata=./testdata
|
||||
elif [ -d "../testdata" ] ; then
|
||||
testdata=../testdata
|
||||
else
|
||||
echo "Cannot find the testdata directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# ------ Special EBCDIC Test -------
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
|
||||
./pcre2test -C ebcdic >/dev/null
|
||||
ebcdic=$?
|
||||
if [ $ebcdic -ne 1 ] ; then
|
||||
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for opt in "" "-dfa"; do
|
||||
./pcre2test -q $opt $testdata/testinputEBC >testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutputEBC testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-dfa" ] ; then echo " OK using DFA"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
# ------ Normal Tests ------
|
||||
|
||||
# Default values
|
||||
|
||||
arg8=
|
||||
arg16=
|
||||
arg32=
|
||||
nojit=
|
||||
sim=
|
||||
skip=
|
||||
valgrind=
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
||||
# Process options and select which tests to run; for those that are explicitly
|
||||
# requested, check that the necessary optional facilities are available.
|
||||
|
||||
do1=no
|
||||
do2=no
|
||||
#do3=no
|
||||
#do4=no
|
||||
#do5=no
|
||||
#do6=no
|
||||
#do7=no
|
||||
#do8=no
|
||||
#do9=no
|
||||
#do10=no
|
||||
#do11=no
|
||||
#do12=no
|
||||
#do13=no
|
||||
#do14=no
|
||||
#do15=no
|
||||
#do16=no
|
||||
#do17=no
|
||||
#do18=no
|
||||
#do19=no
|
||||
#do20=no
|
||||
#do21=no
|
||||
#do22=no
|
||||
#do23=no
|
||||
#do24=no
|
||||
#do25=no
|
||||
#do26=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
# 3) do3=yes;;
|
||||
# 4) do4=yes;;
|
||||
# 5) do5=yes;;
|
||||
# 6) do6=yes;;
|
||||
# 7) do7=yes;;
|
||||
# 8) do8=yes;;
|
||||
# 9) do9=yes;;
|
||||
# 10) do10=yes;;
|
||||
# 11) do11=yes;;
|
||||
# 12) do12=yes;;
|
||||
# 13) do13=yes;;
|
||||
# 14) do14=yes;;
|
||||
# 15) do15=yes;;
|
||||
# 16) do16=yes;;
|
||||
# 17) do17=yes;;
|
||||
# 18) do18=yes;;
|
||||
# 19) do19=yes;;
|
||||
# 20) do20=yes;;
|
||||
# 21) do21=yes;;
|
||||
# 22) do22=yes;;
|
||||
# 23) do23=yes;;
|
||||
# 24) do24=yes;;
|
||||
# 25) do25=yes;;
|
||||
# 26) do26=yes;;
|
||||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
nojit) nojit=yes;;
|
||||
sim) shift; sim=$1;;
|
||||
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
||||
~*)
|
||||
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
|
||||
else
|
||||
echo "Unknown option or test selector '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*-*)
|
||||
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
|
||||
tf=`expr "$1" : '\([0-9]*\)'`
|
||||
tt=`expr "$1" : '.*-\([0-9]*\)'`
|
||||
if [ "$tt" = "" ] ; then tt=$maxtest; fi
|
||||
if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
while expr "$tf" "<=" "$tt" >/dev/null; do
|
||||
eval do${tf}=yes
|
||||
tf=`expr $tf + 1`
|
||||
done
|
||||
else
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*) echo "Unknown option or test selector '$1'"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Find which optional facilities are available.
|
||||
|
||||
$sim ./pcre2test -C linksize >/dev/null
|
||||
link_size=$?
|
||||
if [ $link_size -lt 2 ] ; then
|
||||
echo "Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
if [ $link_size -gt 4 ] ; then
|
||||
echo "Failed to find internal link size"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
|
||||
# one need be.
|
||||
|
||||
$sim ./pcre2test -C pcre8 >/dev/null
|
||||
support8=$?
|
||||
$sim ./pcre2test -C pcre16 >/dev/null
|
||||
support16=$?
|
||||
$sim ./pcre2test -C pcre32 >/dev/null
|
||||
support32=$?
|
||||
|
||||
# Initialize all bitsizes skipped
|
||||
|
||||
test8=skip
|
||||
test16=skip
|
||||
test32=skip
|
||||
|
||||
# If no bitsize arguments, select all that are available
|
||||
|
||||
if [ "$arg8$arg16$arg32" = "" ] ; then
|
||||
if [ $support8 -ne 0 ] ; then
|
||||
test8=
|
||||
fi
|
||||
if [ $support16 -ne 0 ] ; then
|
||||
test16=-16
|
||||
fi
|
||||
if [ $support32 -ne 0 ] ; then
|
||||
test32=-32
|
||||
fi
|
||||
|
||||
# Select requested bit sizes
|
||||
|
||||
else
|
||||
if [ "$arg8" = yes ] ; then
|
||||
if [ $support8 -eq 0 ] ; then
|
||||
echo "Cannot run 8-bit library tests: 8-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test8=
|
||||
fi
|
||||
if [ "$arg16" = yes ] ; then
|
||||
if [ $support16 -eq 0 ] ; then
|
||||
echo "Cannot run 16-bit library tests: 16-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test16=-16
|
||||
fi
|
||||
if [ "$arg32" = yes ] ; then
|
||||
if [ $support32 -eq 0 ] ; then
|
||||
echo "Cannot run 32-bit library tests: 32-bit library not compiled"
|
||||
exit 1
|
||||
fi
|
||||
test32=-32
|
||||
fi
|
||||
fi
|
||||
|
||||
# UTF support always applies to all bit sizes if both are supported; we can't
|
||||
# have UTF-8 support without UTF-16 support (for example).
|
||||
|
||||
$sim ./pcre2test -C utf >/dev/null
|
||||
utf=$?
|
||||
|
||||
jitopt=
|
||||
$sim ./pcre2test -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-jit
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do1 = no -a $do2 = no ]; then
|
||||
# -a $do3 = no -a $do4 = no -a \
|
||||
# $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
||||
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
||||
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
|
||||
# $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
|
||||
# $do25 = no -a $do26 = no
|
||||
|
||||
do1=yes
|
||||
do2=yes
|
||||
# do3=yes
|
||||
# do4=yes
|
||||
# do5=yes
|
||||
# do6=yes
|
||||
# do7=yes
|
||||
# do8=yes
|
||||
# do9=yes
|
||||
# do10=yes
|
||||
# do11=yes
|
||||
# do12=yes
|
||||
# do13=yes
|
||||
# do14=yes
|
||||
# do15=yes
|
||||
# do16=yes
|
||||
# do17=yes
|
||||
# do18=yes
|
||||
# do19=yes
|
||||
# do20=yes
|
||||
# do21=yes
|
||||
# do22=yes
|
||||
# do23=yes
|
||||
# do24=yes
|
||||
# do25=yes
|
||||
# do26=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
# only of explicit skips.
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
# Show which release and which test data
|
||||
|
||||
echo ""
|
||||
echo PCRE2 C library tests using test data from $testdata
|
||||
$sim ./pcre2test /dev/null
|
||||
echo ""
|
||||
|
||||
for bmode in "$test8" "$test16" "$test32"; do
|
||||
case "$bmode" in
|
||||
skip) continue;;
|
||||
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
|
||||
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
|
||||
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
|
||||
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
|
||||
*) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
|
||||
esac
|
||||
|
||||
# Primary test, compatible with JIT and all versions of Perl >= 5.8
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo $title1
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput1 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo $title2 "(not UTF-$bits)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput2 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
echo " "
|
||||
echo "** Test 2 requires a lot of stack. If it has crashed with a"
|
||||
echo "** segmentation fault, it may be that you do not have enough"
|
||||
echo "** stack available by default. Please see the 'pcre2stack' man"
|
||||
echo "** page for a discussion of PCRE2's stack usage."
|
||||
echo " "
|
||||
exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
## Locale-specific tests, provided that either the "fr_FR" or the "french"
|
||||
## locale is available. The former is the Unix-like standard; the latter is
|
||||
## for Windows. Another possibility is "fr". Unfortunately, different versions
|
||||
## of the French locale give different outputs for some items. This test passes
|
||||
## if the output matches any one of the alternative output files.
|
||||
#
|
||||
#if [ $do3 = yes ] ; then
|
||||
# locale -a | grep '^fr_FR$' >/dev/null
|
||||
# if [ $? -eq 0 ] ; then
|
||||
# locale=fr_FR
|
||||
# infile=$testdata/testinput3
|
||||
# outfile=$testdata/testoutput3
|
||||
# outfile2=$testdata/testoutput3A
|
||||
# outfile3=$testdata/testoutput3B
|
||||
# else
|
||||
# infile=test3input
|
||||
# outfile=test3output
|
||||
# outfile2=test3outputA
|
||||
# outfile3=test3outputB
|
||||
# locale -a | grep '^french$' >/dev/null
|
||||
# if [ $? -eq 0 ] ; then
|
||||
# locale=french
|
||||
# sed 's/fr_FR/french/' $testdata/testinput3 >test3input
|
||||
# sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
|
||||
# sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
|
||||
# sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
|
||||
# else
|
||||
# locale -a | grep '^fr$' >/dev/null
|
||||
# if [ $? -eq 0 ] ; then
|
||||
# locale=fr
|
||||
# sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
|
||||
# sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
|
||||
# sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
|
||||
# sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
|
||||
# else
|
||||
# locale=
|
||||
# fi
|
||||
# fi
|
||||
# fi
|
||||
#
|
||||
# if [ "$locale" != "" ] ; then
|
||||
# echo $title3 "(using '$locale' locale)"
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# if $cf $outfile testtry >teststdout || \
|
||||
# $cf $outfile2 testtry >teststdout || \
|
||||
# $cf $outfile3 testtry >teststdout
|
||||
# then
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# else
|
||||
# echo "** Locale test did not run successfully. The output did not match"
|
||||
# echo " $outfile, $outfile2 or $outfile3."
|
||||
# echo " This may mean that there is a problem with the locale settings rather"
|
||||
# echo " than a bug in PCRE."
|
||||
# exit 1
|
||||
# fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# done
|
||||
# else
|
||||
# echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or"
|
||||
# echo "'french' locales exist, or the \"locale\" command is not available"
|
||||
# echo "to check for them."
|
||||
# echo " "
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Additional tests for UTF support
|
||||
#
|
||||
#if [ $do4 = yes ] ; then
|
||||
# echo ${title4A}-${bits}${title4B}
|
||||
# if [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput4 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do5 = yes ] ; then
|
||||
# echo ${title5}-${bits} support
|
||||
# if [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput5 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do6 = yes ] ; then
|
||||
# echo $title6
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput6 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Test non-Perl-compatible Unicode property support
|
||||
#
|
||||
#if [ $do7 = yes ] ; then
|
||||
# echo $title7
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput7 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for DFA matching support
|
||||
#
|
||||
#if [ $do8 = yes ] ; then
|
||||
# echo $title8
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput8 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
#fi
|
||||
#
|
||||
#if [ $do9 = yes ] ; then
|
||||
# echo ${title9}-${bits}
|
||||
# if [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput9 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do10 = yes ] ; then
|
||||
# echo $title10
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput10 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Test of internal offsets and code sizes. This test is run only when there
|
||||
## is Unicode property support and the link size is 2. The actual tests are
|
||||
## mostly the same as in some of the above, but in this test we inspect some
|
||||
## offsets and sizes that require a known link size. This is a doublecheck for
|
||||
## the maintainer, just in case something changes unexpectely. The output from
|
||||
## this test is not the same in 8-bit and 16-bit modes.
|
||||
#
|
||||
#if [ $do11 = yes ] ; then
|
||||
# echo $title11
|
||||
# if [ $link_size -ne 2 ] ; then
|
||||
# echo " Skipped because link size is not 2"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput11-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Test JIT-specific features when JIT is available
|
||||
#
|
||||
#if [ $do12 = yes ] ; then
|
||||
# echo $title12
|
||||
# if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
# echo " Skipped because JIT is not available or not usable"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput12 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput12 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Test JIT-specific features when JIT is not available
|
||||
#
|
||||
#if [ $do13 = yes ] ; then
|
||||
# echo $title13
|
||||
# if [ $jit -ne 0 ] ; then
|
||||
# echo " Skipped because JIT is available"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput13 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features
|
||||
#
|
||||
#if [ "$do14" = yes ] ; then
|
||||
# echo $title14
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# else
|
||||
# cp -f $testdata/saved16 testsaved16
|
||||
# cp -f $testdata/saved32 testsaved32
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput14 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features (needs UTF-8 support)
|
||||
#
|
||||
#if [ "$do15" = yes ] ; then
|
||||
# echo $title15
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput15 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 8-bit-specific features (Unicode property support)
|
||||
#
|
||||
#if [ $do16 = yes ] ; then
|
||||
# echo $title16
|
||||
# if [ "$bits" = "16" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 16/32-bit tests"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput16 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features
|
||||
#
|
||||
#if [ $do17 = yes ] ; then
|
||||
# echo $title17
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput17 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features (UTF-16/32 support)
|
||||
#
|
||||
#if [ $do18 = yes ] ; then
|
||||
# echo $title18
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput18-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features (Unicode property support)
|
||||
#
|
||||
#if [ $do19 = yes ] ; then
|
||||
# echo $title19
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput19 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
|
||||
#
|
||||
#if [ $do20 = yes ] ; then
|
||||
# echo $title20
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput20 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for reloads with 16/32-bit library
|
||||
#
|
||||
#if [ $do21 = yes ] ; then
|
||||
# echo $title21
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $link_size -ne 2 ] ; then
|
||||
# echo " Skipped because link size is not 2"
|
||||
# else
|
||||
# cp -f $testdata/saved8 testsaved8
|
||||
# cp -f $testdata/saved16LE-1 testsaved16LE-1
|
||||
# cp -f $testdata/saved16BE-1 testsaved16BE-1
|
||||
# cp -f $testdata/saved32LE-1 testsaved32LE-1
|
||||
# cp -f $testdata/saved32BE-1 testsaved32BE-1
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput21 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput21-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for reloads with 16/32-bit library (UTF-16 support)
|
||||
#
|
||||
#if [ $do22 = yes ] ; then
|
||||
# echo $title22
|
||||
# if [ "$bits" = "8" ] ; then
|
||||
# echo " Skipped when running 8-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# elif [ $link_size -ne 2 ] ; then
|
||||
# echo " Skipped because link size is not 2"
|
||||
# else
|
||||
# cp -f $testdata/saved16LE-2 testsaved16LE-2
|
||||
# cp -f $testdata/saved16BE-2 testsaved16BE-2
|
||||
# cp -f $testdata/saved32LE-2 testsaved32LE-2
|
||||
# cp -f $testdata/saved32BE-2 testsaved32BE-2
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput22 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput22-$bits testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do23 = yes ] ; then
|
||||
# echo $title23
|
||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 8/32-bit tests"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput23 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do24 = yes ] ; then
|
||||
# echo $title24
|
||||
# if [ "$bits" = "8" -o "$bits" = "32" ] ; then
|
||||
# echo " Skipped when running 8/32-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput24 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do25 = yes ] ; then
|
||||
# echo $title25
|
||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
||||
# echo " Skipped when running 8/16-bit tests"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput25 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do26 = yes ] ; then
|
||||
# echo $title26
|
||||
# if [ "$bits" = "8" -o "$bits" = "16" ] ; then
|
||||
# echo " Skipped when running 8/16-bit tests"
|
||||
# elif [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput26 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# echo " OK"
|
||||
# fi
|
||||
#fi
|
||||
|
||||
# End of loop for 8/16/32-bit tests
|
||||
done
|
||||
|
||||
# Clean up local working files
|
||||
rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
|
||||
|
||||
# End
|
185
doc/pcre2test.1
185
doc/pcre2test.1
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "05 July 2014" "PCRE 10.00"
|
||||
.TH PCRE2TEST 1 "22 July 2014" "PCRE 10.00"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -51,7 +51,7 @@ before being passed to the library functions. Results are converted back to
|
|||
8-bit code units for output.
|
||||
.P
|
||||
In the rest of this document, the names of library functions and structures
|
||||
are given in generic form, for example, \fBpcre_compile()\fP. The actual
|
||||
are given in generic form, for example, \fBpcre_compile()\fP. The actual
|
||||
names used in the libraries have a suffix _8, _16, or _32, as appropriate.
|
||||
.
|
||||
.
|
||||
|
@ -130,8 +130,8 @@ form and information about the compiled pattern is output after compilation;
|
|||
\fB-d\fP is equivalent to \fB-b -i\fP.
|
||||
.TP 10
|
||||
\fB-dfa\fP
|
||||
Behave as if each subject line has the \fBdfa\fP modifier; matching is done
|
||||
using the \fBpcre2_dfa_match()\fP function instead of the default
|
||||
Behave as if each subject line has the \fBdfa\fP modifier; matching is done
|
||||
using the \fBpcre2_dfa_match()\fP function instead of the default
|
||||
\fBpcre2_match()\fP.
|
||||
.TP 10
|
||||
\fB-help\fP
|
||||
|
@ -141,6 +141,10 @@ Output a brief summary these options and then exit.
|
|||
Behave as if each pattern has the \fB/info\fP modifier; information about the
|
||||
compiled pattern is given after compilation.
|
||||
.TP 10
|
||||
\fB-jit\fP
|
||||
Behave as if each pattern line has the \fBjit\fP modifier; after successful
|
||||
compilation, each pattern is passed to the just-in-time compiler, if available.
|
||||
.TP 10
|
||||
\fB-pattern\fB \fImodifier-list\fP
|
||||
Behave as if each pattern line contains the given modifiers.
|
||||
.TP 10
|
||||
|
@ -152,7 +156,7 @@ On Unix-like systems, set the size of the run-time stack to \fIsize\fP
|
|||
megabytes.
|
||||
.TP10
|
||||
\fB-subject\fP \fImodifier-list\fP
|
||||
Behave as if each subject line contains the given modifiers.
|
||||
Behave as if each subject line contains the given modifiers.
|
||||
.TP 10
|
||||
\fB-t\fP
|
||||
Run each compile and match many times with a timer, and output the resulting
|
||||
|
@ -191,7 +195,7 @@ the \fB-help\fP option states whether or not \fBreadline()\fP will be used.
|
|||
The program handles any number of tests, each of which consists of a set of
|
||||
input lines. Each set starts with a regular expression pattern, followed by any
|
||||
number of subject lines to be matched against that pattern. In between sets of
|
||||
test data, command lines that begin with a hash (#) character may appear. This
|
||||
test data, command lines that begin with a hash (#) character may appear. This
|
||||
file format, with some restrictions, can also be processed by the
|
||||
\fBperltest.pl\fP script that is distributed with PCRE2 as a means of checking
|
||||
that the behaviour of PCRE2 and Perl is the same.
|
||||
|
@ -212,52 +216,63 @@ still input to be read.
|
|||
.SH "COMMAND LINES"
|
||||
.rs
|
||||
.sp
|
||||
In between sets of test data, a line that begins with a hash (#) character is
|
||||
In between sets of test data, a line that begins with a hash (#) character is
|
||||
interpreted as a command line. If the first character is followed by white
|
||||
space or an exclamation mark, the line is treated as a comment, and ignored.
|
||||
space or an exclamation mark, the line is treated as a comment, and ignored.
|
||||
Otherwise, the following commands are recognized:
|
||||
.sp
|
||||
#forbid_utf
|
||||
.sp
|
||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
|
||||
options set, which locks out the use of UTF and Unicode property features. This
|
||||
is a trigger guard that is used in test files to ensure that UTF/Unicode tests
|
||||
are not accidentally added to files that are used when UTF support is not
|
||||
included in the library. This effect can also be obtained by the use of
|
||||
\fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be unset, and
|
||||
the automatic options are not displayed in pattern information, to avoid
|
||||
cluttering up test output.
|
||||
.sp
|
||||
#load <file name>
|
||||
.sp
|
||||
Load a pre-compiled pattern that has been saved in a file. This command must be
|
||||
followed immediately by any subject lines that are to be matched by the
|
||||
Load a pre-compiled pattern that has been saved in a file. This command must be
|
||||
followed immediately by any subject lines that are to be matched by the
|
||||
pattern.
|
||||
.sp
|
||||
.sp
|
||||
#pattern <modifier-list>
|
||||
.sp
|
||||
This command sets a default modifier list that applies to all subsequent
|
||||
This command sets a default modifier list that applies to all subsequent
|
||||
patterns. Modifiers on a pattern can change these settings.
|
||||
.sp
|
||||
#perltest
|
||||
.sp
|
||||
The appearance of this line causes all subsequent modifier settings to be
|
||||
The appearance of this line causes all subsequent modifier settings to be
|
||||
checked for compatibility with the \fBperltest.pl\fP script, which is used to
|
||||
confirm that Perl gives the same results as PCRE2. Also, apart from comment
|
||||
lines, none of the other command lines are permitted, because they and many
|
||||
of the modifiers are specific to \fBpcre2test\fP, and should not be used in
|
||||
test files that are also processed by \fBperltest.pl\fP. The \fP#perltest\fB
|
||||
test files that are also processed by \fBperltest.pl\fP. The \fP#perltest\fB
|
||||
command helps detect tests that are accidentally put in the wrong file.
|
||||
.sp
|
||||
.sp
|
||||
#subject <modifier-list>
|
||||
.sp
|
||||
This command sets a default modifier list that applies to all subsequent
|
||||
subject lines. Modifiers on a subject line can change these settings.
|
||||
This command sets a default modifier list that applies to all subsequent
|
||||
subject lines. Modifiers on a subject line can change these settings.
|
||||
.
|
||||
.
|
||||
.SH "MODIFIER SYNTAX"
|
||||
.rs
|
||||
.sp
|
||||
Modifier lists are used with both pattern and subject lines. Items in a list
|
||||
are separated by commas and optional white space. Some modifiers may be given
|
||||
for both patterns and subject lines, whereas others are valid for one or the
|
||||
other only. Each modifier has a long name, for example "anchored", and some of
|
||||
Modifier lists are used with both pattern and subject lines. Items in a list
|
||||
are separated by commas and optional white space. Some modifiers may be given
|
||||
for both patterns and subject lines, whereas others are valid for one or the
|
||||
other only. Each modifier has a long name, for example "anchored", and some of
|
||||
them must be followed by an equals sign and a value, for example, "offset=12".
|
||||
Modifiers that do not take values may be preceded by a minus sign to turn off a
|
||||
Modifiers that do not take values may be preceded by a minus sign to turn off a
|
||||
previous default setting.
|
||||
.P
|
||||
A few of the more common modifiers can also be specified as single or double
|
||||
letters, for example "i" for "caseless". In documentation, following the Perl
|
||||
convention, these are written with a slash ("the /i modifier") for clarity.
|
||||
convention, these are written with a slash ("the /i modifier") for clarity.
|
||||
Abbreviated modifiers must all be concatenated in the first item of a modifier
|
||||
list. If the first item is not recognized as a long modifier name, it is
|
||||
interpreted as a sequence of these abbreviations. For example:
|
||||
|
@ -340,28 +355,29 @@ possible to construct invalid UTF-16 sequences for testing purposes.
|
|||
In UTF-32 mode, all 4- to 8-digit \ex{...} values are accepted. This makes it
|
||||
possible to construct invalid UTF-32 sequences for testing purposes.
|
||||
.P
|
||||
There is a special backslash sequence that specifies replication of one or more
|
||||
There is a special backslash sequence that specifies replication of one or more
|
||||
characters:
|
||||
.sp
|
||||
\e[<characters>]{<count>}
|
||||
.sp
|
||||
This makes it possible to test long strings without having to provide them as
|
||||
This makes it possible to test long strings without having to provide them as
|
||||
part of the file. For example:
|
||||
.sp
|
||||
\e[abc]{4}
|
||||
.sp
|
||||
is converted to "abcabcabcabc". This feature does not support nesting. To
|
||||
include a closing square bracket in the characters, code it as \ex5D.
|
||||
is converted to "abcabcabcabc". This feature does not support nesting. To
|
||||
include a closing square bracket in the characters, code it as \ex5D.
|
||||
.P
|
||||
A backslash followed by an equals sign marke the end of the subject string and
|
||||
A backslash followed by an equals sign marke the end of the subject string and
|
||||
the start of a modifier list. For example:
|
||||
.sp
|
||||
abc\=notbol,notempty
|
||||
abc\=notbol,notempty
|
||||
.sp
|
||||
A backslash followed by anything else causes an error. However, if the very
|
||||
last character in the line is a backslash (and there is no modifier list), it
|
||||
is ignored. This gives a way of passing an empty line as data, since a real
|
||||
empty line terminates the data input.
|
||||
A backslash followed by any other non-alphanumeric character just escapes that
|
||||
character. A backslash followed by anything else causes an error. However, if
|
||||
the very last character in the line is a backslash (and there is no modifier
|
||||
list), it is ignored. This gives a way of passing an empty line as data, since
|
||||
a real empty line terminates the data input.
|
||||
.
|
||||
.
|
||||
.SH "PATTERN MODIFIERS"
|
||||
|
@ -375,7 +391,7 @@ can add to or override default modifiers that were set by a previous
|
|||
.SS "Setting compilation options"
|
||||
.rs
|
||||
.sp
|
||||
The following modifiers set options for \fBpcre2_compile()\fP. The most common
|
||||
The following modifiers set options for \fBpcre2_compile()\fP. The most common
|
||||
ones have single-letter abbreviations. See
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
|
@ -421,10 +437,10 @@ about the pattern:
|
|||
flipbytes flip endianness
|
||||
/BB fullbincode show binary code with lengths
|
||||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
parens_nest_limit=<n> set maximum parentheses depth
|
||||
perlcompat lock out non-Perl modifiers
|
||||
|
@ -432,7 +448,7 @@ about the pattern:
|
|||
save=<file name> save compiled pattern
|
||||
stackguard=<number> test the stackguard feature
|
||||
tables=[0|1|2] select internal tables
|
||||
use_length use the pattern's length
|
||||
use_length use the pattern's length
|
||||
.sp
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
@ -441,23 +457,23 @@ FIXME: Give more examples.
|
|||
.SS "Newline and \eR handling"
|
||||
.rs
|
||||
.sp
|
||||
The \fBbsr\fP modifier specifies what \eR in a pattern should match. If it is
|
||||
set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode",
|
||||
\eR matches any Unicode newline sequence. The default is specified when PCRE2
|
||||
The \fBbsr\fP modifier specifies what \eR in a pattern should match. If it is
|
||||
set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode",
|
||||
\eR matches any Unicode newline sequence. The default is specified when PCRE2
|
||||
is built, with the default default being Unicode.
|
||||
.P
|
||||
The \fBnewline\fP modifier specifies which characters are to be interpreted as
|
||||
The \fBnewline\fP modifier specifies which characters are to be interpreted as
|
||||
newlines, both in the pattern and (by default) in subject lines. The type must
|
||||
be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
.P
|
||||
Both the \eR and newline settings can be changed at match time, but if this is
|
||||
Both the \eR and newline settings can be changed at match time, but if this is
|
||||
done, JIT matching is disabled.
|
||||
.
|
||||
.
|
||||
.SS "Information about a pattern"
|
||||
.rs
|
||||
.sp
|
||||
The \fBdebug\fP modifier is a shorthand for \fBinfo,fullbincode\fP, requesting
|
||||
The \fBdebug\fP modifier is a shorthand for \fBinfo,fullbincode\fP, requesting
|
||||
all available information.
|
||||
.P
|
||||
The \fBbincode\fP modifier causes a representation of the compiled code to be
|
||||
|
@ -466,12 +482,12 @@ values, which ensures that the same output is generated for different internal
|
|||
link sizes and different code unit widths. By using \fBbincode\fP, the same
|
||||
regression tests can be used in different environments.
|
||||
.P
|
||||
The \fBfullbincode\fP modifier, by contrast, \fIdoes\fP include length and
|
||||
The \fBfullbincode\fP modifier, by contrast, \fIdoes\fP include length and
|
||||
offset values. This is used in a few special tests and is also useful for
|
||||
one-off tests.
|
||||
.P
|
||||
The \fBinfo\fP modifier requests information about the compiled pattern
|
||||
(whether it is anchored, has a fixed first character, and so on). The
|
||||
(whether it is anchored, has a fixed first character, and so on). The
|
||||
information is obtained from the \fBpcre2_pattern_info()\fP function.
|
||||
.
|
||||
.
|
||||
|
@ -490,21 +506,21 @@ below.
|
|||
.SS "Specifying a pattern in hex"
|
||||
.rs
|
||||
.sp
|
||||
The \fBhex\fP modifier specifies that the characters of the pattern are to be
|
||||
The \fBhex\fP modifier specifies that the characters of the pattern are to be
|
||||
interpreted as pairs of hexadecimal digits. White space is permitted between
|
||||
pairs. For example:
|
||||
.sp
|
||||
/ab 32 59/hex
|
||||
.sp
|
||||
This feature is provided as a way of creating patterns that contain binary zero
|
||||
This feature is provided as a way of creating patterns that contain binary zero
|
||||
characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
|
||||
.
|
||||
.
|
||||
.SS "Using the pattern's length"
|
||||
.rs
|
||||
.sp
|
||||
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
|
||||
\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
|
||||
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
|
||||
\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
|
||||
length of the pattern is passed. This is implied if \fBhex\fP is set.
|
||||
.
|
||||
.
|
||||
|
@ -549,7 +565,7 @@ character tables for the locale, and this is then passed to
|
|||
\fBpcre2_compile()\fP when compiling the regular expression. The same tables
|
||||
are used when matching the following subject lines. The \fB/locale\fP modifier
|
||||
applies only to the pattern on which it appears, but can be given in a
|
||||
\fB#pattern\fP command if a default is needed. Setting a locale and alternate
|
||||
\fB#pattern\fP command if a default is needed. Setting a locale and alternate
|
||||
character tables are mutually exclusive.
|
||||
.
|
||||
.
|
||||
|
@ -566,7 +582,7 @@ also output.
|
|||
.SS "Limiting nested parentheses"
|
||||
.rs
|
||||
.sp
|
||||
The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested
|
||||
The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested
|
||||
parentheses in a pattern. Breaching the limit causes a compilation error.
|
||||
.
|
||||
.
|
||||
|
@ -594,14 +610,17 @@ below. All other modifiers cause an error.
|
|||
.rs
|
||||
.sp
|
||||
The \fB/stackguard\fP modifier is used to test the use of
|
||||
\fBpcre2_stack_guard\fP. It must be followed by '0' or '1', specifying the
|
||||
return code to be given from an external function that is passed to PCRE2 and
|
||||
used for stack checking during compilation (see the
|
||||
\fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to
|
||||
enable stack availability to be checked during compilation (see the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation for details). FIXME: this needs doing properly once the test is
|
||||
implemented. Mention nested parens limit.
|
||||
documentation for details). If the number specified by the modifier is greater
|
||||
than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up
|
||||
callback from \fBpcre2_compile()\fP to a local function. The argument it is
|
||||
passed is the current nesting parenthesis depth; if this is greater than the
|
||||
value given by the modifier, non-zero is returned, causing the compilation to
|
||||
be aborted.
|
||||
.
|
||||
.
|
||||
.SS "Using alternative character tables"
|
||||
|
@ -618,7 +637,7 @@ different character tables. The digit specifies the tables as follows:
|
|||
2 a set of tables defining ISO 8859 characters
|
||||
.sp
|
||||
In table 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
||||
letters, digits, spaces, etc. Setting alternate character tables and a locale
|
||||
are mutually exclusive.
|
||||
.
|
||||
.
|
||||
|
@ -635,24 +654,24 @@ not affect the compilation process.
|
|||
allcaptures show all captures
|
||||
/gg altglobal alternative global matching
|
||||
/g global global matching
|
||||
jitverify verify JIT usage
|
||||
jitverify verify JIT usage
|
||||
mark show mark values
|
||||
.sp
|
||||
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
||||
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
||||
defaults, set them in a \fB#subject\fP command.
|
||||
.
|
||||
.
|
||||
.SH "SUBJECT MODIFIERS"
|
||||
.rs
|
||||
.sp
|
||||
The modifiers that can appear in subject lines and the \fB#subject\fP
|
||||
The modifiers that can appear in subject lines and the \fB#subject\fP
|
||||
command are of two types.
|
||||
.
|
||||
.
|
||||
.SS "Setting match options"
|
||||
.rs
|
||||
.sp
|
||||
The following modifiers set options for \fBpcre2_match()\fP or
|
||||
The following modifiers set options for \fBpcre2_match()\fP or
|
||||
\fBpcre2_dfa_match()\fP. See
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
|
@ -674,7 +693,7 @@ for a description of their effects.
|
|||
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
|
||||
wrapper API to be used, the only option-setting modifiers that have any effect
|
||||
are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL,
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP.
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP.
|
||||
Any other modifiers cause an error.
|
||||
.
|
||||
.SS "Setting match controls"
|
||||
|
@ -691,7 +710,7 @@ pattern.
|
|||
/gg altglobal alternative global matching
|
||||
bsr=[anycrlf|unicode] specify \eR handling
|
||||
callout_capture show captures at callout time
|
||||
callout_data=<n> set a value to pass via callouts
|
||||
callout_data=<n> set a value to pass via callouts
|
||||
callout_fail=<n>[:<m>] control callout failure
|
||||
callout_none do not supply a callout function
|
||||
copy=<number or name> copy captured substring
|
||||
|
@ -717,9 +736,9 @@ FIXME: Give more examples.
|
|||
.SS "Newline and \eR handling"
|
||||
.rs
|
||||
.sp
|
||||
These modifiers set the newline and \eR processing conventions for the subject
|
||||
line, overriding any values that were set at compile time (as described above).
|
||||
JIT matching is disabled if these settings are changed at match time.
|
||||
These modifiers set the newline and \eR processing conventions for the subject
|
||||
line, overriding any values that were set at compile time (as described above).
|
||||
JIT matching is disabled if these settings are changed at match time.
|
||||
.
|
||||
.
|
||||
.SS "Showing more text"
|
||||
|
@ -751,31 +770,31 @@ A callout function is supplied when \fBpcre2test\fP calls the library matching
|
|||
functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
|
||||
set, the current captured groups are output when a callout occurs.
|
||||
.P
|
||||
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
|
||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
||||
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
|
||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
||||
for the <m>th time.
|
||||
.P
|
||||
The \fBcallout_data\fP modifier can be given an unsigned or a negative number.
|
||||
Any value other than zero is used as a return from \fBpcre2test\fP's callout
|
||||
The \fBcallout_data\fP modifier can be given an unsigned or a negative number.
|
||||
Any value other than zero is used as a return from \fBpcre2test\fP's callout
|
||||
function.
|
||||
.
|
||||
.
|
||||
.SS "Testing substring extraction functions"
|
||||
.rs
|
||||
.sp
|
||||
The \fBcopy\fP and \fBget\fP modifiers can be used to test the
|
||||
The \fBcopy\fP and \fBget\fP modifiers can be used to test the
|
||||
\fBpcre2_substring_copy_xxx()\fP and \fBpcre2_substring_get_xxx()\fP functions.
|
||||
They can be given more than once, and each can specify a group name or number,
|
||||
for example:
|
||||
.sp
|
||||
abcd\=copy=1,copy=3,get=G1
|
||||
.sp
|
||||
If the \fB#subject\fP command is used to set default copy and get lists, these
|
||||
can be unset by specifying a negative number for numbered groups and an empty
|
||||
If the \fB#subject\fP command is used to set default copy and get lists, these
|
||||
can be unset by specifying a negative number for numbered groups and an empty
|
||||
name for named groups.
|
||||
.P
|
||||
The \fBgetall\fP modifier tests \fBpcre2_substring_list_get()\fP, which
|
||||
The \fBgetall\fP modifier tests \fBpcre2_substring_list_get()\fP, which
|
||||
extracts all captured substrings.
|
||||
.P
|
||||
If the subject line is successfully matched, the substrings extracted by the
|
||||
|
@ -820,7 +839,7 @@ default 32K is necessary only for very complicated patterns.
|
|||
.SS "Setting match and recursion limits"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmatch_limit\fP and \fBrecursion_limit\fP modifiers set the appropriate
|
||||
The \fBmatch_limit\fP and \fBrecursion_limit\fP modifiers set the appropriate
|
||||
limits in the match context. These values are ignored when the
|
||||
\fBfind_limits\fP modifier is specified.
|
||||
.
|
||||
|
@ -857,23 +876,23 @@ is added to the non-match message.
|
|||
.SS "Showing memory usage"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmemory\fP modifier causes \fBpcre2test\fP to log all memory allocation
|
||||
The \fBmemory\fP modifier causes \fBpcre2test\fP to log all memory allocation
|
||||
and freeing calls that occur during a match operation.
|
||||
.
|
||||
.
|
||||
.SS "Setting a starting offset"
|
||||
.rs
|
||||
.sp
|
||||
The \fBoffset\fP modifier sets an offset in the subject string at which
|
||||
The \fBoffset\fP modifier sets an offset in the subject string at which
|
||||
matching starts. Its value is a number of code units, not characters.
|
||||
.
|
||||
.
|
||||
.SS "Setting the size of the output vector"
|
||||
.rs
|
||||
.sp
|
||||
The \fBovector\fP modifier applies only to the subject line in which it
|
||||
appears, though of course it can also be used to set a default in a
|
||||
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
||||
The \fBovector\fP modifier applies only to the subject line in which it
|
||||
appears, though of course it can also be used to set a default in a
|
||||
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
||||
available for storing matching information. The default is 15.
|
||||
.
|
||||
.
|
||||
|
@ -909,7 +928,7 @@ Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or
|
|||
return is PCRE2_ERROR_PARTIAL. (Note that this is the
|
||||
entire substring that was inspected during the partial match; it may include
|
||||
characters before the actual match start if a lookbehind assertion, \eK, \eb,
|
||||
or \eB was involved.)
|
||||
or \eB was involved.)
|
||||
.P
|
||||
For any other return, \fBpcre2test\fP outputs the PCRE2
|
||||
negative error number and a short descriptive phrase. If the error is a failed
|
||||
|
@ -1210,6 +1229,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 05 July 2014
|
||||
Last updated: 22 July 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -0,0 +1,247 @@
|
|||
#! /usr/bin/env perl
|
||||
|
||||
# Program for testing regular expressions with perl to check that PCRE2 handles
|
||||
# them the same. This version needs to have "use utf8" at the start for running
|
||||
# the UTF-8 tests, but *not* for the other tests. The only way I've found for
|
||||
# doing this is to cat this line in explicitly in the RunPerlTest script. I've
|
||||
# also used this method to supply "require Encode" for the UTF-8 tests, so that
|
||||
# the main test will still run where Encode is not installed.
|
||||
|
||||
#use utf8;
|
||||
#require Encode;
|
||||
|
||||
# Function for turning a string into a string of printing chars.
|
||||
|
||||
sub pchars {
|
||||
my($t) = "";
|
||||
|
||||
if ($utf8)
|
||||
{
|
||||
@p = unpack('U*', $_[0]);
|
||||
foreach $c (@p)
|
||||
{
|
||||
if ($c >= 32 && $c < 127) { $t .= chr $c; }
|
||||
else { $t .= sprintf("\\x{%02x}", $c);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach $c (split(//, $_[0]))
|
||||
{
|
||||
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
|
||||
else { $t .= sprintf("\\x%02x", ord $c); }
|
||||
}
|
||||
}
|
||||
|
||||
$t;
|
||||
}
|
||||
|
||||
|
||||
# Read lines from named file or stdin and write to named file or stdout; lines
|
||||
# consist of a regular expression, in delimiters and optionally followed by
|
||||
# options, followed by a set of test data, terminated by an empty line.
|
||||
|
||||
# Sort out the input and output files
|
||||
|
||||
if (@ARGV > 0)
|
||||
{
|
||||
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
|
||||
$infile = "INFILE";
|
||||
}
|
||||
else { $infile = "STDIN"; }
|
||||
|
||||
if (@ARGV > 1)
|
||||
{
|
||||
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
|
||||
$outfile = "OUTFILE";
|
||||
}
|
||||
else { $outfile = "STDOUT"; }
|
||||
|
||||
printf($outfile "Perl $] Regular Expressions\n\n");
|
||||
|
||||
# Main loop
|
||||
|
||||
NEXT_RE:
|
||||
for (;;)
|
||||
{
|
||||
printf " re> " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
|
||||
|
||||
$pattern = $_;
|
||||
|
||||
while ($pattern !~ /^\s*(.).*\1/s)
|
||||
{
|
||||
printf " > " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
$pattern .= $_;
|
||||
}
|
||||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
|
||||
# Split the pattern from the modifiers and adjust them as necessary.
|
||||
|
||||
$pattern =~ /^\s*((.).*\2)(.*)$/s;
|
||||
$pat = $1;
|
||||
$mod = $3;
|
||||
|
||||
# The private "aftertext" modifier means "print $' afterwards".
|
||||
|
||||
$showrest = ($mod =~ s/aftertext,?//);
|
||||
|
||||
# "allaftertext" is used by pcretest to print remainders after captures
|
||||
|
||||
$mod =~ s/allaftertext,?//;
|
||||
|
||||
# Detect utf
|
||||
|
||||
$utf8 = $mod =~ s/utf,?//;
|
||||
|
||||
# Remove "dupnames".
|
||||
|
||||
$mod =~ s/dupnames,?//;
|
||||
|
||||
# Remove "mark" (asks pcre2test to check MARK data) */
|
||||
|
||||
$mod =~ s/mark,?//;
|
||||
|
||||
# "ucp" asks pcre2test to set PCRE_UCP; change this to /u for Perl
|
||||
|
||||
$mod =~ s/W(?=[a-zA-Z]*$)/u/;
|
||||
|
||||
# Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
|
||||
|
||||
$mod =~ s/no_auto_possess,?//;
|
||||
$mod =~ s/no_start_optimize,?//;
|
||||
|
||||
# Add back retained modifiers and check that the pattern is valid.
|
||||
|
||||
$mod =~ s/,//g;
|
||||
$pattern = "$pat$mod";
|
||||
eval "\$_ =~ ${pattern}";
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@";
|
||||
if ($infile != "STDIN")
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
last if ! ($_ = <$infile>);
|
||||
last if $_ =~ /^\s*$/;
|
||||
}
|
||||
}
|
||||
next NEXT_RE;
|
||||
}
|
||||
|
||||
# If the /g modifier is present, we want to put a loop round the matching;
|
||||
# otherwise just a single "if".
|
||||
|
||||
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
|
||||
|
||||
# If the pattern is actually the null string, Perl uses the most recently
|
||||
# executed (and successfully compiled) regex is used instead. This is a
|
||||
# nasty trap for the unwary! The PCRE2 test suite does contain null strings
|
||||
# in places - if they are allowed through here all sorts of weird and
|
||||
# unexpected effects happen. To avoid this, we replace such patterns with
|
||||
# a non-null pattern that has the same effect.
|
||||
|
||||
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
|
||||
|
||||
# Read data lines and test them
|
||||
|
||||
for (;;)
|
||||
{
|
||||
printf "data> " if $infile eq "STDIN";
|
||||
last NEXT_RE if ! ($_ = <$infile>);
|
||||
chomp;
|
||||
printf $outfile "$_\n" if $infile ne "STDIN";
|
||||
|
||||
s/\s+$//; # Remove trailing space
|
||||
s/^\s+//; # Remove leading space
|
||||
s/\\Y//g; # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
|
||||
|
||||
last if ($_ eq "");
|
||||
$x = eval "\"$_\""; # To get escapes processed
|
||||
|
||||
# Empty array for holding results, ensure $REGERROR and $REGMARK are
|
||||
# unset, then do the matching.
|
||||
|
||||
@subs = ();
|
||||
|
||||
$pushes = "push \@subs,\$&;" .
|
||||
"push \@subs,\$1;" .
|
||||
"push \@subs,\$2;" .
|
||||
"push \@subs,\$3;" .
|
||||
"push \@subs,\$4;" .
|
||||
"push \@subs,\$5;" .
|
||||
"push \@subs,\$6;" .
|
||||
"push \@subs,\$7;" .
|
||||
"push \@subs,\$8;" .
|
||||
"push \@subs,\$9;" .
|
||||
"push \@subs,\$10;" .
|
||||
"push \@subs,\$11;" .
|
||||
"push \@subs,\$12;" .
|
||||
"push \@subs,\$13;" .
|
||||
"push \@subs,\$14;" .
|
||||
"push \@subs,\$15;" .
|
||||
"push \@subs,\$16;" .
|
||||
"push \@subs,\$'; }";
|
||||
|
||||
undef $REGERROR;
|
||||
undef $REGMARK;
|
||||
|
||||
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
|
||||
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@\n";
|
||||
next NEXT_RE;
|
||||
}
|
||||
elsif (scalar(@subs) == 0)
|
||||
{
|
||||
printf $outfile "No match";
|
||||
if (defined $REGERROR && $REGERROR != 1)
|
||||
{ printf $outfile (", mark = %s", &pchars($REGERROR)); }
|
||||
printf $outfile "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
while (scalar(@subs) != 0)
|
||||
{
|
||||
printf $outfile (" 0: %s\n", &pchars($subs[0]));
|
||||
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
|
||||
$last_printed = 0;
|
||||
for ($i = 1; $i <= 16; $i++)
|
||||
{
|
||||
if (defined $subs[$i])
|
||||
{
|
||||
while ($last_printed++ < $i-1)
|
||||
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
|
||||
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
|
||||
$last_printed = $i;
|
||||
}
|
||||
}
|
||||
splice(@subs, 0, 18);
|
||||
}
|
||||
|
||||
# It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
|
||||
# set and the input pattern was a UTF-8 string. We can, however, force
|
||||
# it to be so marked.
|
||||
|
||||
if (defined $REGMARK && $REGMARK != 1)
|
||||
{
|
||||
$xx = $REGMARK;
|
||||
$xx = Encode::decode_utf8($xx) if $utf8;
|
||||
printf $outfile ("MK: %s\n", &pchars($xx));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# printf $outfile "\n";
|
||||
|
||||
# End
|
|
@ -561,7 +561,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77 };
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -1703,10 +1703,10 @@ else
|
|||
ptr += 4;
|
||||
if (utf)
|
||||
{
|
||||
if (c > 0x10ffffU) *errorcodeptr = ERR76;
|
||||
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
||||
else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
|
||||
}
|
||||
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR76;
|
||||
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1815,12 +1815,11 @@ else
|
|||
recommended to avoid the ambiguities in the old syntax.
|
||||
|
||||
Outside a character class, the digits are read as a decimal number. If the
|
||||
number is less than 8 (used to be 10), or if there are that many previous
|
||||
extracting left brackets, then it is a back reference. Otherwise, up to
|
||||
three octal digits are read to form an escaped byte. Thus \123 is likely to
|
||||
be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
|
||||
the octal value is greater than 377, the least significant 8 bits are
|
||||
taken. \8 and \9 are treated as the literal characters 8 and 9.
|
||||
number is less than 10, or if there are that many previous extracting left
|
||||
brackets, it is a back reference. Otherwise, up to three octal digits are
|
||||
read to form an escaped byte. Thus \123 is likely to be octal 123 (cf
|
||||
\0123, which is octal 012 followed by the literal 3). If the octal value is
|
||||
greater than 377, the least significant 8 bits are taken.
|
||||
|
||||
Inside a character class, \ followed by a digit is always either a literal
|
||||
8 or 9 or an octal number. */
|
||||
|
@ -1832,7 +1831,7 @@ else
|
|||
{
|
||||
oldptr = ptr;
|
||||
/* The integer range is limited by the machine's int representation. */
|
||||
s = (int)(c -CHAR_0);
|
||||
s = (int)(c - CHAR_0);
|
||||
overflow = FALSE;
|
||||
while (IS_DIGIT(ptr[1]))
|
||||
{
|
||||
|
@ -1849,7 +1848,7 @@ else
|
|||
*errorcodeptr = ERR61;
|
||||
break;
|
||||
}
|
||||
if (s < 8 || s <= cb->bracount) /* Check for back reference */
|
||||
if (s < 10 || s <= cb->bracount) /* Check for back reference */
|
||||
{
|
||||
escape = -s;
|
||||
break;
|
||||
|
@ -1886,7 +1885,7 @@ else
|
|||
|
||||
case CHAR_o:
|
||||
if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
|
||||
if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR77; else
|
||||
if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else
|
||||
{
|
||||
ptr += 2;
|
||||
c = 0;
|
||||
|
@ -1947,7 +1946,7 @@ else
|
|||
ptr += 2;
|
||||
if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
*errorcodeptr = ERR77;
|
||||
*errorcodeptr = ERR78;
|
||||
break;
|
||||
}
|
||||
c = 0;
|
||||
|
@ -1955,12 +1954,12 @@ else
|
|||
|
||||
while ((cc = XDIGIT(*ptr)) != 0xff)
|
||||
{
|
||||
ptr++;
|
||||
if (c == 0 && cc == 0) continue; /* Leading zeroes */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x10000000l) { overflow = TRUE; break; }
|
||||
#endif
|
||||
c = (c << 4) | cc;
|
||||
ptr++;
|
||||
if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
|
||||
{
|
||||
overflow = TRUE;
|
||||
|
@ -2002,9 +2001,9 @@ else
|
|||
break;
|
||||
|
||||
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
|
||||
An error is given if the byte following \c is not an ASCII character. This
|
||||
coding is ASCII-specific, but then the whole concept of \cx is
|
||||
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
|
||||
An error is given if the byte following \c is not a printable ASCII
|
||||
character. This coding is ASCII-specific, but then the whole concept of \cx
|
||||
is ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
|
||||
|
||||
case CHAR_c:
|
||||
c = *(++ptr);
|
||||
|
@ -2014,7 +2013,7 @@ else
|
|||
break;
|
||||
}
|
||||
#ifndef EBCDIC /* ASCII/UTF-8 coding */
|
||||
if (c > 127) /* Excludes all non-ASCII in either mode */
|
||||
if (c < 32 || c > 126) /* Excludes all non-printable ASCII */
|
||||
{
|
||||
*errorcodeptr = ERR68;
|
||||
break;
|
||||
|
@ -3820,7 +3819,7 @@ for (;; ptr++)
|
|||
{
|
||||
ptr += 2;
|
||||
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
|
||||
{ ptr += 2; goto CONTINUE_CLASS; }
|
||||
{ ptr += 2; continue; }
|
||||
inescq = TRUE;
|
||||
break;
|
||||
}
|
||||
|
@ -4981,7 +4980,7 @@ for (;; ptr++)
|
|||
arglen = (int)(ptr - arg);
|
||||
if ((unsigned int)arglen > MAX_MARK)
|
||||
{
|
||||
*errorcodeptr = ERR75;
|
||||
*errorcodeptr = ERR76;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
|
@ -6533,10 +6532,10 @@ Arguments:
|
|||
reset_bracount TRUE to reset the count for each branch
|
||||
skipunits skip this many code units at start (for brackets and OP_COND)
|
||||
cond_depth depth of nesting for conditional subpatterns
|
||||
firstcuptr place to put the first required code unit
|
||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
||||
reqcuptr place to put the last required code unit
|
||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
||||
firstcuptr place to put the first required code unit
|
||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
||||
reqcuptr place to put the last required code unit
|
||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
||||
bcptr pointer to the chain of currently open branches
|
||||
cb points to the data block with tables pointers etc.
|
||||
lengthptr NULL during the real compile phase
|
||||
|
@ -6548,10 +6547,9 @@ Returns: TRUE on success
|
|||
static BOOL
|
||||
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
|
||||
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipunits,
|
||||
int cond_depth,
|
||||
uint32_t *firstcuptr, int32_t *firstcuflagsptr,
|
||||
uint32_t *reqcuptr, int32_t *reqcuflagsptr,
|
||||
branch_chain *bcptr, compile_block *cb, size_t *lengthptr)
|
||||
int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
|
||||
uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
|
||||
compile_block *cb, size_t *lengthptr)
|
||||
{
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_UCHAR *code = *codeptr;
|
||||
|
@ -6569,15 +6567,13 @@ unsigned int orig_bracount;
|
|||
unsigned int max_bracount;
|
||||
branch_chain bc;
|
||||
|
||||
#ifdef FIXME
|
||||
/* If set, call the external function that checks for stack availability. */
|
||||
|
||||
if (ccontext->stack_guard != NULL && ccontext->stack_guard(0))
|
||||
if (cb->cx->stack_guard != NULL && cb->cx->stack_guard(cb->parens_depth))
|
||||
{
|
||||
*errorcodeptr= ERR33;
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Miscellaneous initialization */
|
||||
|
||||
|
@ -7434,7 +7430,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
|||
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
|
||||
c = c*10 + ptr[pp++] - CHAR_0;
|
||||
}
|
||||
if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) goto END_PSO;
|
||||
if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
errorcode = ERR60;
|
||||
goto HAD_ERROR;
|
||||
}
|
||||
if (p->type == PSO_LIMM) limit_match = c;
|
||||
else limit_recursion = c;
|
||||
skipatstart += pp - skipatstart;
|
||||
|
@ -7443,12 +7443,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
|||
break; /* Out of the table scan loop */
|
||||
}
|
||||
}
|
||||
if (i > sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
|
||||
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
|
||||
}
|
||||
|
||||
/* End of pattern-start options; advance to start of real regex. */
|
||||
|
||||
END_PSO:
|
||||
ptr += skipatstart;
|
||||
|
||||
/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
|
||||
|
@ -7476,6 +7475,15 @@ if (utf)
|
|||
(errorcode = PRIV(valid_utf)(pattern, -1, erroroffset)) != 0)
|
||||
goto HAD_ERROR;
|
||||
}
|
||||
|
||||
/* Check UCP lockout. */
|
||||
|
||||
if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
|
||||
(PCRE2_UCP|PCRE2_NEVER_UCP))
|
||||
{
|
||||
errorcode = ERR75;
|
||||
goto HAD_ERROR;
|
||||
}
|
||||
|
||||
/* Process the BSR setting. */
|
||||
|
||||
|
|
|
@ -148,15 +148,16 @@ static const char compile_error_texts[] =
|
|||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
"\\c must be followed by an ASCII character\0"
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown opcode in find_fixedlength()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"too many forward references\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using (*UTF) is disabled by the application\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
|
@ -223,7 +224,7 @@ static const char match_error_texts[] =
|
|||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
"unknown or unset substring\0"
|
||||
/* 50 */
|
||||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
|
|
|
@ -6782,6 +6782,12 @@ ENDLOOP:
|
|||
release_match_heapframes(&frame_zero, mb);
|
||||
#endif
|
||||
|
||||
/* Fill in fields that are always returned in the match data. */
|
||||
|
||||
match_data->code = re;
|
||||
match_data->subject = subject;
|
||||
match_data->mark = mb->mark;
|
||||
|
||||
/* Handle a fully successful match. */
|
||||
|
||||
if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
||||
|
@ -6841,26 +6847,27 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
|||
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
|
||||
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
|
||||
}
|
||||
|
||||
/* Set the remaining returned values */
|
||||
|
||||
/* Fill in the remaining fields that are returned in the match data. */
|
||||
|
||||
match_data->code = re;
|
||||
match_data->subject = subject;
|
||||
match_data->leftchar = mb->start_used_ptr - subject;
|
||||
match_data->rightchar = 0; /* FIXME */
|
||||
match_data->startchar = start_match - subject;
|
||||
match_data->mark = mb->mark;
|
||||
return match_data->rc;
|
||||
}
|
||||
|
||||
/* Control gets here if there has been a partial match, an error, or if the
|
||||
overall match attempt has failed at all permitted starting positions. For
|
||||
anything other than nomatch or partial match, just return the code. */
|
||||
overall match attempt has failed at all permitted starting positions. Any mark
|
||||
data is in the nomatch_mark field. */
|
||||
|
||||
match_data->mark = mb->nomatch_mark;
|
||||
|
||||
/* For anything other than nomatch or partial match, just return the code. */
|
||||
|
||||
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
|
||||
match_data->rc = rc;
|
||||
|
||||
/* Handle a partial match. */
|
||||
/* Else handle a partial match. */
|
||||
|
||||
else if (match_partial != NULL)
|
||||
{
|
||||
|
@ -6870,16 +6877,16 @@ else if (match_partial != NULL)
|
|||
match_data->ovector[1] = end_subject - subject;
|
||||
}
|
||||
match_data->leftchar = start_partial - subject;
|
||||
match_data->rightchar = 0; /* FIXME */
|
||||
match_data->startchar = match_partial - subject;
|
||||
match_data->rc = PCRE2_ERROR_PARTIAL;
|
||||
}
|
||||
|
||||
/* This is the classic nomatch case. */
|
||||
/* Else this is the classic nomatch case. */
|
||||
|
||||
else
|
||||
{
|
||||
match_data->rc = PCRE2_ERROR_NOMATCH;
|
||||
match_data->mark = mb->nomatch_mark;
|
||||
}
|
||||
else match_data->rc = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
/* Free any temporary offsets. */
|
||||
|
||||
if (using_temporary_offsets)
|
||||
mb->memctl.free(mb->ovector, mb->memctl.memory_data);
|
||||
|
|
|
@ -119,6 +119,7 @@ size_t left, right;
|
|||
size_t p = 0;
|
||||
PCRE2_SPTR subject = match_data->subject;
|
||||
if (stringnumber >= match_data->oveccount ||
|
||||
stringnumber > match_data->code->top_bracket ||
|
||||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
|
@ -203,6 +204,7 @@ PCRE2_UCHAR *yield;
|
|||
|
||||
PCRE2_SPTR subject = match_data->subject;
|
||||
if (stringnumber >= match_data->oveccount ||
|
||||
stringnumber > match_data->code->top_bracket ||
|
||||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
|
@ -293,6 +295,7 @@ pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
|||
int stringnumber)
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount ||
|
||||
stringnumber > match_data->code->top_bracket ||
|
||||
match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
return match_data->ovector[stringnumber*2 + 1] -
|
||||
|
|
269
src/pcre2test.c
269
src/pcre2test.c
|
@ -46,7 +46,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
. save code and #load
|
||||
. JIT - compile, time, verify
|
||||
. memory handling testing
|
||||
. stackguard testing
|
||||
*/
|
||||
|
||||
|
||||
|
@ -435,7 +434,7 @@ static modstruct modlist[] = {
|
|||
{ "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
|
||||
{ "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
|
||||
{ "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
|
||||
{ "dupnames", MOD_PAT, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
|
||||
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
|
||||
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
|
||||
{ "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
|
||||
{ "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
|
||||
|
@ -612,6 +611,7 @@ clock_t total_compile_time = 0;
|
|||
clock_t total_match_time = 0;
|
||||
|
||||
static uint32_t dfa_matched;
|
||||
static uint32_t forbid_utf = 0;
|
||||
static uint32_t max_oveccount;
|
||||
static uint32_t callout_count;
|
||||
|
||||
|
@ -830,6 +830,14 @@ are supported. */
|
|||
pcre2_set_character_tables_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_character_tables_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
|
@ -1102,6 +1110,12 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b); \
|
||||
else \
|
||||
G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
|
||||
|
@ -1245,8 +1259,10 @@ the three different cases. */
|
|||
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
|
||||
#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
|
||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||
pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c);
|
||||
pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
|
@ -1304,12 +1320,14 @@ the three different cases. */
|
|||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||
pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *))b,c);
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
|
||||
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e);
|
||||
a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
|
||||
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
|
||||
#define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d) \
|
||||
a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d)
|
||||
|
@ -1361,10 +1379,12 @@ the three different cases. */
|
|||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *))b,c);
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
|
||||
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
|
||||
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
|
||||
a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
|
||||
#define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
|
||||
|
@ -1766,6 +1786,25 @@ free(block);
|
|||
#endif /* NO_RECURSE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callback function for stack guard *
|
||||
*************************************************/
|
||||
|
||||
/* This is set up to be called from pcre2_compile() when the stackguard=n
|
||||
modifier sets a value greater than zero. The test we do is whether the
|
||||
parenthesis nesting depth is greater than the value set by the modifier.
|
||||
|
||||
Argument: the current parenthesis nesting depth
|
||||
Returns: non-zero to kill the compilation
|
||||
*/
|
||||
|
||||
static int
|
||||
stack_guard(uint32_t depth)
|
||||
{
|
||||
return depth > pat_patctl.stackguard_test;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert UTF-8 character to code point *
|
||||
*************************************************/
|
||||
|
@ -2031,16 +2070,16 @@ return i + 1;
|
|||
|
||||
#ifdef SUPPORT_PCRE16
|
||||
/*************************************************
|
||||
* Convert a string to 16-bit *
|
||||
* Convert pattern to 16-bit *
|
||||
*************************************************/
|
||||
|
||||
/* The input is always interpreted as a string of UTF-8 bytes. If all the input
|
||||
bytes are ASCII, the space needed for a 16-bit string is exactly double the
|
||||
8-bit size. Otherwise, the size needed for a 16-bit string is no more than
|
||||
double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
|
||||
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
|
||||
result is always left in pbuffer16. Impose a minimum size to save repeated
|
||||
re-sizing.
|
||||
/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
|
||||
all the input bytes are ASCII, the space needed for a 16-bit string is exactly
|
||||
double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
|
||||
more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
|
||||
possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
|
||||
UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
|
||||
repeated re-sizing.
|
||||
|
||||
Note that this function does not object to surrogate values. This is
|
||||
deliberate; it makes it possible to construct UTF-16 strings that are invalid,
|
||||
|
@ -2074,9 +2113,13 @@ if (pbuffer16_size < 2*len + 2)
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
pp = pbuffer16;
|
||||
|
||||
while (len > 0)
|
||||
pp = pbuffer16;
|
||||
if (!utf)
|
||||
{
|
||||
while (len-- > 0) *pp++ = *p++;
|
||||
}
|
||||
else while (len > 0)
|
||||
{
|
||||
uint32_t c;
|
||||
int chlen = utf82ord(p, &c);
|
||||
|
@ -2102,15 +2145,15 @@ return pp - pbuffer16;
|
|||
|
||||
#ifdef SUPPORT_PCRE32
|
||||
/*************************************************
|
||||
* Convert a string to 32-bit *
|
||||
* Convert pattern to 32-bit *
|
||||
*************************************************/
|
||||
|
||||
/* The input is always interpreted as a string of UTF-8 bytes. If all the input
|
||||
bytes are ASCII, the space needed for a 32-bit string is exactly four times the
|
||||
8-bit size. Otherwise, the size needed for a 32-bit string is no more than four
|
||||
times, because the number of characters must be less than the number of bytes.
|
||||
The result is always left in pbuffer32. Impose a minimum size to save repeated
|
||||
re-sizing.
|
||||
/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
|
||||
all the input bytes are ASCII, the space needed for a 32-bit string is exactly
|
||||
four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
|
||||
more than four times, because the number of characters must be less than the
|
||||
number of bytes. The result is always left in pbuffer32. Impose a minimum size
|
||||
to save repeated re-sizing.
|
||||
|
||||
Note that this function does not object to surrogate values. This is
|
||||
deliberate; it makes it possible to construct UTF-32 strings that are invalid,
|
||||
|
@ -2143,9 +2186,13 @@ if (pbuffer32_size < 4*len + 4)
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
pp = pbuffer32;
|
||||
|
||||
while (len > 0)
|
||||
pp = pbuffer32;
|
||||
if (!utf)
|
||||
{
|
||||
while (len-- > 0) *pp++ = *p++;
|
||||
}
|
||||
else while (len > 0)
|
||||
{
|
||||
uint32_t c;
|
||||
int chlen = utf82ord(p, &c);
|
||||
|
@ -3020,9 +3067,26 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
|
||||
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
|
||||
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
|
||||
|
||||
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
|
||||
cluttering up the verification output of non-UTF test files. */
|
||||
|
||||
if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
|
||||
{
|
||||
compile_options &= ~PCRE2_NEVER_UTF;
|
||||
overall_options &= ~PCRE2_NEVER_UTF;
|
||||
}
|
||||
|
||||
if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
|
||||
{
|
||||
compile_options &= ~PCRE2_NEVER_UCP;
|
||||
overall_options &= ~PCRE2_NEVER_UCP;
|
||||
}
|
||||
|
||||
if ((compile_options|overall_options) == 0)
|
||||
fprintf(outfile, "No options\n");
|
||||
else if (compile_options == overall_options)
|
||||
show_compile_options(compile_options, "Options:", "\n");
|
||||
else
|
||||
{
|
||||
show_compile_options(compile_options, "Compile options:", "\n");
|
||||
|
@ -3035,26 +3099,26 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
||||
"any Unicode newline" : "CR, LF, or CRLF");
|
||||
|
||||
switch (newline_convention)
|
||||
if (newline_convention != NEWLINE_DEFAULT) switch (newline_convention)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
fprintf(outfile, "Newline is CR\n");
|
||||
fprintf(outfile, "Forced newline is CR\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_LF:
|
||||
fprintf(outfile, "Newline is LF\n");
|
||||
fprintf(outfile, "Forced newline is LF\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
fprintf(outfile, "Newline is CRLF\n");
|
||||
fprintf(outfile, "Forced newline is CRLF\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
fprintf(outfile, "Newline is CR, LF, or CRLF\n");
|
||||
fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
|
||||
break;
|
||||
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
fprintf(outfile, "Newline is any Unicode newline\n");
|
||||
fprintf(outfile, "Forced newline is any Unicode newline\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -3063,7 +3127,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
|
||||
if (first_ctype == 2)
|
||||
{
|
||||
fprintf(outfile, "First char at start or follows newline\n");
|
||||
fprintf(outfile, "First code unit at start or follows newline\n");
|
||||
}
|
||||
else if (first_ctype == 1)
|
||||
{
|
||||
|
@ -3079,35 +3143,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
fprintf(outfile, "%s\n", caseless);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "No first code unit\n");
|
||||
}
|
||||
|
||||
if (last_ctype == 0)
|
||||
{
|
||||
fprintf(outfile, "No last code unit\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *caseless =
|
||||
((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
|
||||
"" : " (caseless)";
|
||||
if (PRINTOK(last_cunit))
|
||||
fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "Last code unit = ");
|
||||
pchar(last_cunit, FALSE, outfile);
|
||||
fprintf(outfile, "%s\n", caseless);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
|
||||
|
||||
if (start_bits == NULL)
|
||||
fprintf(outfile, "No starting code unit list\n");
|
||||
else
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
int i;
|
||||
int c = 24;
|
||||
|
@ -3135,6 +3171,31 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
}
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "No first code unit\n");
|
||||
}
|
||||
|
||||
if (last_ctype == 0)
|
||||
{
|
||||
fprintf(outfile, "No last code unit\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *caseless =
|
||||
((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
|
||||
"" : " (caseless)";
|
||||
if (PRINTOK(last_cunit))
|
||||
fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "Last code unit = ");
|
||||
pchar(last_cunit, FALSE, outfile);
|
||||
fprintf(outfile, "%s\n", caseless);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(outfile, "Subject length lower bound = %d\n", minlength);
|
||||
|
||||
/* FIXME: tidy this up */
|
||||
|
||||
|
@ -3183,7 +3244,11 @@ if (restrict_for_perl_test)
|
|||
return PR_ABEND;
|
||||
}
|
||||
|
||||
if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
|
||||
if (strncmp((char *)buffer, "#forbid_utf", 11) == 0 && isspace(buffer[11]))
|
||||
{
|
||||
forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
|
||||
}
|
||||
else if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
|
||||
{
|
||||
(void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
|
||||
}
|
||||
|
@ -3491,6 +3556,13 @@ else switch (pat_patctl.tables_id)
|
|||
|
||||
PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
|
||||
|
||||
/* Set up for the stackguard test. */
|
||||
|
||||
if (pat_patctl.stackguard_test != 0)
|
||||
{
|
||||
PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
|
||||
}
|
||||
|
||||
/* Handle compiling via the POSIX interface, which doesn't support the
|
||||
timing, showing, or debugging options, nor the ability to pass over
|
||||
local character tables. Neither does it have 16-bit or 32-bit support. */
|
||||
|
@ -3604,7 +3676,7 @@ if (timeit > 0)
|
|||
for (i = 0; i < timeit; i++)
|
||||
{
|
||||
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
|
||||
pat_patctl.options, &errorcode, &erroroffset, pat_context);
|
||||
pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context);
|
||||
if (TEST(compiled_code, !=, NULL))
|
||||
{ SUB1(pcre2_code_free, compiled_code); }
|
||||
}
|
||||
|
@ -3618,8 +3690,8 @@ if (timeit > 0)
|
|||
|
||||
/* A final compile that is used "for real". */
|
||||
|
||||
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode,
|
||||
&erroroffset, pat_context);
|
||||
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
|
||||
&errorcode, &erroroffset, pat_context);
|
||||
|
||||
/* Compilation failed; go back for another re, skipping to blank line
|
||||
if non-interactive. */
|
||||
|
@ -3782,15 +3854,13 @@ for (;;)
|
|||
min = mid;
|
||||
mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
|
||||
}
|
||||
|
||||
else if (capcount >= 0 ||
|
||||
capcount == PCRE2_ERROR_NOMATCH ||
|
||||
capcount == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (mid == min + 1)
|
||||
{
|
||||
if (capcount != PCRE2_ERROR_NOMATCH)
|
||||
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
||||
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
||||
break;
|
||||
}
|
||||
max = mid;
|
||||
|
@ -4184,8 +4254,11 @@ while ((c = *p++) != 0)
|
|||
continue;
|
||||
|
||||
default:
|
||||
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
|
||||
return PR_OK;
|
||||
if (isalnum(c))
|
||||
{
|
||||
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
|
||||
return PR_OK;
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a character value in c that may be greater than 255.
|
||||
|
@ -4608,7 +4681,12 @@ for (gmatched = 0;; gmatched++)
|
|||
PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer,
|
||||
sizeof(copybuffer)/code_unit_size);
|
||||
if (rc < 0)
|
||||
fprintf(outfile, "copy substring %d failed %d\n", n, rc);
|
||||
{
|
||||
fprintf(outfile, "copy substring %d failed (%d): ", n, rc);
|
||||
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "%2dC ", n);
|
||||
|
@ -4641,7 +4719,10 @@ for (gmatched = 0;; gmatched++)
|
|||
copybuffer, sizeof(copybuffer)/code_unit_size);
|
||||
if (rc < 0)
|
||||
{
|
||||
fprintf(outfile, "copy substring '%s' failed %d\n", nptr, rc);
|
||||
fprintf(outfile, "copy substring '%s' failed (%d): ", nptr, rc);
|
||||
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4661,7 +4742,12 @@ for (gmatched = 0;; gmatched++)
|
|||
uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
|
||||
PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer);
|
||||
if (rc < 0)
|
||||
fprintf(outfile, "get substring %d failed %d\n", n, rc);
|
||||
{
|
||||
fprintf(outfile, "get substring %d failed (%d): ", n, rc);
|
||||
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "%2dG ", n);
|
||||
|
@ -4694,7 +4780,10 @@ for (gmatched = 0;; gmatched++)
|
|||
PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer);
|
||||
if (rc < 0)
|
||||
{
|
||||
fprintf(outfile, "get substring '%s' failed %d\n", nptr, rc);
|
||||
fprintf(outfile, "get substring '%s' failed (%d): ", nptr, rc);
|
||||
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4715,7 +4804,12 @@ for (gmatched = 0;; gmatched++)
|
|||
size_t *lengths;
|
||||
PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
|
||||
if (rc < 0)
|
||||
fprintf(outfile, "get substring list failed %d\n", rc);
|
||||
{
|
||||
fprintf(outfile, "get substring list failed (%d): ", rc);
|
||||
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < capcount; i++)
|
||||
|
@ -4737,7 +4831,6 @@ for (gmatched = 0;; gmatched++)
|
|||
else if (capcount == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
|
||||
|
||||
fprintf(outfile, "Partial match");
|
||||
if (leftchar != FLD(match_data, startchar))
|
||||
fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
|
||||
|
@ -4880,8 +4973,8 @@ for (gmatched = 0;; gmatched++)
|
|||
else
|
||||
{
|
||||
pp += end_offset * code_unit_size;
|
||||
len -= end_offset;
|
||||
ulen -= end_offset *code_unit_size;
|
||||
len -= end_offset * code_unit_size;
|
||||
ulen -= end_offset;
|
||||
}
|
||||
}
|
||||
} /* End of global loop */
|
||||
|
@ -4894,7 +4987,7 @@ return PR_OK;
|
|||
|
||||
|
||||
/*************************************************
|
||||
* Print PCRE version *
|
||||
* Print PCRE2 version *
|
||||
*************************************************/
|
||||
|
||||
/* The version string was read into 'version' at the start of execution. */
|
||||
|
@ -4903,7 +4996,7 @@ static void
|
|||
print_version(FILE *f)
|
||||
{
|
||||
VERSION_TYPE *vp;
|
||||
fprintf(f, "PCRE version ");
|
||||
fprintf(f, "PCRE2 version ");
|
||||
for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
@ -4976,6 +5069,7 @@ printf(" -d set default pattern control 'debug'\n");
|
|||
printf(" -dfa set default subject control 'dfa'\n");
|
||||
printf(" -help show usage information\n");
|
||||
printf(" -i set default pattern control 'info'\n");
|
||||
printf(" -jit set default pattern control 'jit'\n");
|
||||
printf(" -q quiet: do not output PCRE version number at start\n");
|
||||
printf(" -pattern <s> set default pattern control fields\n");
|
||||
printf(" -subject <s> set default subject control fields\n");
|
||||
|
@ -5261,10 +5355,18 @@ while (argc > 1 && argv[op][0] == '-')
|
|||
|
||||
/* Set some common pattern and subject controls */
|
||||
|
||||
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
|
||||
else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
|
||||
else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
|
||||
else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
|
||||
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
|
||||
else if (strcmp(arg, "-jit") == 0)
|
||||
{
|
||||
def_patctl.jit = 7; /* full & partial */
|
||||
#ifndef SUPPORT_JIT
|
||||
fprintf(stderr, "** Warning: JIT support is not available: "
|
||||
"-jit calls dummy functions.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Set timing parameters */
|
||||
|
||||
|
@ -5500,10 +5602,11 @@ while (notdone)
|
|||
|
||||
else
|
||||
{
|
||||
while (isspace(*p)) p++;
|
||||
while (isspace(*p)) p++;
|
||||
if (*p != 0)
|
||||
{
|
||||
fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer);
|
||||
fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
|
||||
*buffer);
|
||||
rc = PR_SKIP;
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue