From 8aa511a98b7a426d8bdb74bd267d3b15565ea227 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 31 Oct 2014 12:34:34 +0000 Subject: [PATCH] Improve test coverage; minor typo in JIT test and other minor buglets fixed. --- Makefile.am | 5 +- RunGrepTest | 24 ++ RunTest | 572 +++++++++++++------------ doc/pcre2_substring_number_from_name.3 | 11 +- doc/pcre2api.3 | 9 +- doc/pcre2test.1 | 54 ++- src/pcre2.h.in | 47 +- src/pcre2_compile.c | 4 +- src/pcre2_error.c | 3 +- src/pcre2_jit_test.c | 2 +- src/pcre2_substring.c | 6 +- src/pcre2test.c | 511 +++++++++++++++------- testdata/grepoutput | 8 +- testdata/testinput17 | 12 + testdata/testinput2 | 16 +- testdata/testinput5 | 5 +- testdata/testoutput14 | 14 +- testdata/testoutput17 | 17 + testdata/testoutput2 | 128 +++--- testdata/testoutput5 | 7 +- testdata/testoutput6 | 4 +- 21 files changed, 905 insertions(+), 554 deletions(-) diff --git a/Makefile.am b/Makefile.am index 7839929..71798e0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -587,11 +587,12 @@ EXTRA_DIST += \ testdata/testoutput18 \ testdata/testoutputEBC \ perltest.sh - + # RunTest and RunGrepTest should clean up after themselves, but just in case -# they don't, add their working files to CLEANFILES. +# they don't, add their working files to CLEANFILES. CLEANFILES += \ + testSinput \ test3input \ test3output \ test3outputA \ diff --git a/RunGrepTest b/RunGrepTest index b5d8a09..719311f 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -72,6 +72,19 @@ fi ./pcre2test -C unicode >/dev/null utf8=$? +# ------ Function to run and check a special pcre2grep arguments test ------- + +checkspecial() + { + $valgrind ./pcre2grep $1 >>testtrygrep 2>&1 + if [ $? -ne $2 ] ; then + echo "** pcre2grep $1 failed - check testtrygrep" + exit 1 + fi + } + +# ------ Normal tests ------ + echo "Testing pcre2grep main features" echo "---------------------------- Test 1 ------------------------------" >testtrygrep @@ -571,6 +584,17 @@ $valgrind $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >> $cf $srcdir/testdata/grepoutputN testtrygrep if [ $? != 0 ] ; then exit 1; fi + +# Finally, some tests to exercise code that is not tested above, just to be +# sure that it runs OK. Doing this improves the coverage statistics. The output +# is not checked. + +echo "Testing miscellaneous pcre2grep arguments (unchecked)" +echo '' >testtrygrep +checkspecial '-xxxxx' 2 +checkspecial '--help' 0 +checkspecial '--line-buffered --colour=auto abc /dev/null' 1 + # Clean up local working files rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep diff --git a/RunTest b/RunTest index 3f2be40..0a39d60 100755 --- a/RunTest +++ b/RunTest @@ -20,14 +20,9 @@ # except test 10. Whatever order the arguments are in, the tests are always run # in numerical order. # -# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the -# locale test, and failure usually means there's an issue with the locale -# rather than a bug in PCRE2, so normally subsequent tests are run. "3S" is -# useful when you want to debug or update the test. -# -# Inappropriate tests are automatically skipped (with a comment to say so): for -# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT -# support is compiled, test 13 is skipped. +# Inappropriate tests are automatically skipped (with a comment to say so). For +# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT +# support is compiled, test 15 is skipped. # # Other arguments can be one of the words "valgrind", "valgrind-log", or "sim" # followed by an argument to run cross-compiled executables under a simulator, @@ -48,6 +43,7 @@ # Define test titles in variables so that they can be output as a list. Some # of them are modified (e.g. with -8 or -16) when used in the actual tests. +title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)" title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)" title2="Test 2: API, errors, internals, and non-Perl stuff" title3="Test 3: Locale-specific features" @@ -69,10 +65,10 @@ title15="Test 15: JIT-specific features when JIT is not available" title16="Test 16: JIT-specific features when JIT is available" title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP" title18="Test 18: Tests of the POSIX interface with UTF/UCP" - maxtest=18 if [ $# -eq 1 -a "$1" = "list" ]; then + echo $title0 echo $title1 echo $title2 "(not UTF or UCP)" echo $title3 @@ -145,10 +141,22 @@ checkresult() } +# ------ Function to run and check a special pcre2test arguments test ------- + +checkspecial() + { + $valgrind ./pcre2test $1 >>testtry + if [ $? -ne 0 ] ; then + echo "** pcre2test $1 failed - check testtry" + exit 1 + fi + } + + # ------ Special EBCDIC Test ------- if [ $# -eq 1 -a "$1" = "ebcdic" ]; then - ./pcre2test -C ebcdic >/dev/null + $valgrind ./pcre2test -C ebcdic >/dev/null ebcdic=$? if [ $ebcdic -ne 1 ] ; then echo "Cannot run EBCDIC tests: EBCDIC support not compiled" @@ -180,6 +188,7 @@ unset cp ls mv rm # Process options and select which tests to run; for those that are explicitly # requested, check that the necessary optional facilities are available. +do0=no do1=no do2=no do3=no @@ -201,6 +210,7 @@ do18=no while [ $# -gt 0 ] ; do case $1 in + 0) do0=yes;; 1) do1=yes;; 2) do2=yes;; 3) do3=yes;; @@ -238,7 +248,7 @@ while [ $# -gt 0 ] ; do tf=`expr "$1" : '\([0-9]*\)'` tt=`expr "$1" : '.*-\([0-9]*\)'` if [ "$tt" = "" ] ; then tt=$maxtest; fi - if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then + if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then echo "Invalid test range '$1'"; exit 1 fi while expr "$tf" "<=" "$tt" >/dev/null; do @@ -287,7 +297,7 @@ test32=skip if [ "$arg8$arg16$arg32" = "" ] ; then if [ $support8 -ne 0 ] ; then - test8= + test8=-8 fi if [ $support16 -ne 0 ] ; then test16=-16 @@ -296,7 +306,7 @@ if [ "$arg8$arg16$arg32" = "" ] ; then test32=-32 fi -# Select requested bit sizes +# Otherwise, select requested bit sizes else if [ "$arg8" = yes ] ; then @@ -304,7 +314,7 @@ else echo "Cannot run 8-bit library tests: 8-bit library not compiled" exit 1 fi - test8= + test8=-8 fi if [ "$arg16" = yes ] ; then if [ $support16 -eq 0 ] ; then @@ -339,12 +349,13 @@ fi # If no specific tests were requested, select all. Those that are not # relevant will be automatically skipped. -if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \ - $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \ - $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \ - $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \ - $do17 = no -a $do18 = no \ +if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ + $do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \ + $do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \ + $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ + $do16 = no -a $do17 = no -a $do18 = no \ ]; then + do0=yes do1=yes do2=yes do3=yes @@ -384,310 +395,325 @@ for bmode in "$test8" "$test16" "$test32"; do bits=16; echo "---- Testing 16-bit library ----"; echo "";; -32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi bits=32; echo "---- Testing 32-bit library ----"; echo "";; - *) bits=8; echo "---- Testing 8-bit library ----"; echo "";; + -8) bits=8; echo "---- Testing 8-bit library ----"; echo "";; esac -# Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8 + # Test 0 is a special test. Its output is not checked, because it will + # be different on different hardware and with different configurations. + # Running this test just exercises the code. -if [ $do1 = yes ] ; then - echo $title1 - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry - checkresult $? 1 "$opt" - done -fi - -# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals - -if [ $do2 = yes ] ; then - echo $title2 "(excluding UTF-$bits)" - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry - if [ $? = 0 ] ; then - checkresult $? 2 "$opt" - else - echo " " - echo "** Test 2 requires a lot of stack. If it has crashed with a" - echo "** segmentation fault, it may be that you do not have enough" - echo "** stack available by default. Please see the 'pcre2stack' man" - echo "** page for a discussion of PCRE2's stack usage." - echo " " - exit 1 - fi - done -fi - -# Locale-specific tests, provided that either the "fr_FR" or the "french" -# locale is available. The former is the Unix-like standard; the latter is -# for Windows. Another possibility is "fr". Unfortunately, different versions -# of the French locale give different outputs for some items. This test passes -# if the output matches any one of the alternative output files. - -if [ $do3 = yes ] ; then - locale -a | grep '^fr_FR$' >/dev/null - if [ $? -eq 0 ] ; then - locale=fr_FR - infile=$testdata/testinput3 - outfile=$testdata/testoutput3 - outfile2=$testdata/testoutput3A - outfile3=$testdata/testoutput3B - else - infile=test3input - outfile=test3output - outfile2=test3outputA - outfile3=test3outputB - locale -a | grep '^french$' >/dev/null - if [ $? -eq 0 ] ; then - locale=french - sed 's/fr_FR/french/' $testdata/testinput3 >test3input - sed 's/fr_FR/french/' $testdata/testoutput3 >test3output - sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA - sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB - else - locale -a | grep '^fr$' >/dev/null - if [ $? -eq 0 ] ; then - locale=fr - sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input - sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output - sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA - sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB - else - locale= - fi - fi + if [ $do0 = yes ] ; then + echo $title0 + echo '/abc/jit,memory' >testSinput + echo ' abc' >>testSinput + echo '' >testtry + checkspecial '-C' + checkspecial '--help' + checkspecial '-S 1 -t 10 testSinput' + echo " OK" fi - if [ "$locale" != "" ] ; then - echo $title3 "(using '$locale' locale)" + # Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8 + + if [ $do1 = yes ] ; then + echo $title1 for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry + checkresult $? 1 "$opt" + done + fi + + # PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals + + if [ $do2 = yes ] ; then + echo $title2 "(excluding UTF-$bits)" + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry if [ $? = 0 ] ; then - case "$opt" in - -jit) with=" with JIT";; - *) with="";; - esac - if $cf $outfile testtry >teststdout || \ - $cf $outfile2 testtry >teststdout || \ - $cf $outfile3 testtry >teststdout - then - echo " OK$with" - else - echo "** Locale test did not run successfully$with. The output did not match" - echo " $outfile, $outfile2 or $outfile3." - echo " This may mean that there is a problem with the locale settings rather" - echo " than a bug in PCRE2." - exit 1 - fi - else exit 1 + checkresult $? 2 "$opt" + else + echo " " + echo "** Test 2 requires a lot of stack. If it has crashed with a" + echo "** segmentation fault, it may be that you do not have enough" + echo "** stack available by default. Please see the 'pcre2stack' man" + echo "** page for a discussion of PCRE2's stack usage." + echo " " + exit 1 fi done - else - echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or" - echo "'french' locales exist, or the \"locale\" command is not available" - echo "to check for them." - echo " " fi -fi -# Tests for UTF and Unicode property support + # Locale-specific tests, provided that either the "fr_FR" or the "french" + # locale is available. The former is the Unix-like standard; the latter is + # for Windows. Another possibility is "fr". Unfortunately, different versions + # of the French locale give different outputs for some items. This test passes + # if the output matches any one of the alternative output files. -if [ $do4 = yes ] ; then - echo ${title4A}-${bits}${title4B} - if [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry - checkresult $? 4 "$opt" - done + if [ $do3 = yes ] ; then + locale -a | grep '^fr_FR$' >/dev/null + if [ $? -eq 0 ] ; then + locale=fr_FR + infile=$testdata/testinput3 + outfile=$testdata/testoutput3 + outfile2=$testdata/testoutput3A + outfile3=$testdata/testoutput3B + else + infile=test3input + outfile=test3output + outfile2=test3outputA + outfile3=test3outputB + locale -a | grep '^french$' >/dev/null + if [ $? -eq 0 ] ; then + locale=french + sed 's/fr_FR/french/' $testdata/testinput3 >test3input + sed 's/fr_FR/french/' $testdata/testoutput3 >test3output + sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA + sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB + else + locale -a | grep '^fr$' >/dev/null + if [ $? -eq 0 ] ; then + locale=fr + sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input + sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output + sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA + sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB + else + locale= + fi + fi + fi + + if [ "$locale" != "" ] ; then + echo $title3 "(using '$locale' locale)" + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry + if [ $? = 0 ] ; then + case "$opt" in + -jit) with=" with JIT";; + *) with="";; + esac + if $cf $outfile testtry >teststdout || \ + $cf $outfile2 testtry >teststdout || \ + $cf $outfile3 testtry >teststdout + then + echo " OK$with" + else + echo "** Locale test did not run successfully$with. The output did not match" + echo " $outfile, $outfile2 or $outfile3." + echo " This may mean that there is a problem with the locale settings rather" + echo " than a bug in PCRE2." + exit 1 + fi + else exit 1 + fi + done + else + echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or" + echo "'french' locales exist, or the \"locale\" command is not available" + echo "to check for them." + echo " " + fi fi -fi -if [ $do5 = yes ] ; then - echo ${title5A}-${bits}$title5B - if [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry - checkresult $? 5 "$opt" - done + # Tests for UTF and Unicode property support + + if [ $do4 = yes ] ; then + echo ${title4A}-${bits}${title4B} + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry + checkresult $? 4 "$opt" + done + fi fi -fi -# Tests for DFA matching support - -if [ $do6 = yes ] ; then - echo $title6 - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry - checkresult $? 6 "" -fi - -if [ $do7 = yes ] ; then - echo ${title7A}-${bits}$title7B - if [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry - checkresult $? 7 "" + if [ $do5 = yes ] ; then + echo ${title5A}-${bits}$title5B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry + checkresult $? 5 "$opt" + done + fi fi -fi -# Test of internal offsets and code sizes. This test is run only when there -# is UTF/UCP support and the link size is 2. The actual tests are -# mostly the same as in some of the above, but in this test we inspect some -# offsets and sizes that require a known link size. This is a doublecheck for -# the maintainer, just in case something changes unexpectely. The output from -# this test is different in 8-bit, 16-bit, and 32-bit modes, so there are -# mode-specific output files. + # Tests for DFA matching support -if [ $do8 = yes ] ; then - echo $title8 - if [ $link_size -ne 2 ] ; then - echo " Skipped because link size is not 2" - elif [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry - checkresult $? 8-$bits "" + if [ $do6 = yes ] ; then + echo $title6 + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry + checkresult $? 6 "" fi -fi -# Tests for 8-bit-specific features - -if [ "$do9" = yes ] ; then - echo $title9 - if [ "$bits" = "16" -o "$bits" = "32" ] ; then - echo " Skipped when running 16/32-bit tests" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry - checkresult $? 9 "$opt" - done + if [ $do7 = yes ] ; then + echo ${title7A}-${bits}$title7B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry + checkresult $? 7 "" + fi fi -fi -# Tests for UTF-8 and UCP 8-bit-specific features + # Test of internal offsets and code sizes. This test is run only when there + # is UTF/UCP support and the link size is 2. The actual tests are + # mostly the same as in some of the above, but in this test we inspect some + # offsets and sizes that require a known link size. This is a doublecheck for + # the maintainer, just in case something changes unexpectely. The output from + # this test is different in 8-bit, 16-bit, and 32-bit modes, so there are + # mode-specific output files. -if [ "$do10" = yes ] ; then - echo $title10 - if [ "$bits" = "16" -o "$bits" = "32" ] ; then - echo " Skipped when running 16/32-bit tests" - elif [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry - checkresult $? 10 "$opt" - done + if [ $do8 = yes ] ; then + echo $title8 + if [ $link_size -ne 2 ] ; then + echo " Skipped because link size is not 2" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry + checkresult $? 8-$bits "" + fi fi -fi -# Tests for 16-bit and 32-bit features. Output is different for the two widths. + # Tests for 8-bit-specific features -if [ $do11 = yes ] ; then - echo $title11 - if [ "$bits" = "8" ] ; then - echo " Skipped when running 8-bit tests" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry - checkresult $? 11-$bits "$opt" - done + if [ "$do9" = yes ] ; then + echo $title9 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry + checkresult $? 9 "$opt" + done + fi fi -fi -# Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output -# is different for the two widths. + # Tests for UTF-8 and UCP 8-bit-specific features -if [ $do12 = yes ] ; then - echo $title12 - if [ "$bits" = "8" ] ; then - echo " Skipped when running 8-bit tests" - elif [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry - checkresult $? 12-$bits "$opt" - done + if [ "$do10" = yes ] ; then + echo $title10 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry + checkresult $? 10 "$opt" + done + fi fi -fi -# Tests for 16/32-bit-specific features in DFA non-UTF modes + # Tests for 16-bit and 32-bit features. Output is different for the two widths. -if [ $do13 = yes ] ; then - echo $title13 - if [ "$bits" = "8" ] ; then - echo " Skipped when running 8-bit tests" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry - checkresult $? 13 "" + if [ $do11 = yes ] ; then + echo $title11 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry + checkresult $? 11-$bits "$opt" + done + fi fi -fi -# Test non-JIT match and recursion limits + # Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output + # is different for the two widths. -if [ $do14 = yes ] ; then - echo $title14 - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry - checkresult $? 14 "" -fi - -# Test JIT-specific features when JIT is not available - -if [ $do15 = yes ] ; then - echo $title15 - if [ $jit -ne 0 ] ; then - echo " Skipped because JIT is available" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry - checkresult $? 15 "" + if [ $do12 = yes ] ; then + echo $title12 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry + checkresult $? 12-$bits "$opt" + done + fi fi -fi -# Test JIT-specific features when JIT is available + # Tests for 16/32-bit-specific features in DFA non-UTF modes -if [ $do16 = yes ] ; then - echo $title16 - if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then - echo " Skipped because JIT is not available or not usable" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry - checkresult $? 16 "" + if [ $do13 = yes ] ; then + echo $title13 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry + checkresult $? 13 "" + fi fi -fi -# Tests for the POSIX interface without UTF/UCP (8-bit only) + # Test non-JIT match and recursion limits -if [ $do17 = yes ] ; then - echo $title17 - if [ "$bits" = "16" -o "$bits" = "32" ] ; then - echo " Skipped when running 16/32-bit tests" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry - checkresult $? 17 "" + if [ $do14 = yes ] ; then + echo $title14 + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry + checkresult $? 14 "" fi -fi -# Tests for the POSIX interface with UTF/UCP (8-bit only) + # Test JIT-specific features when JIT is not available -if [ $do18 = yes ] ; then - echo $title18 - if [ "$bits" = "16" -o "$bits" = "32" ] ; then - echo " Skipped when running 16/32-bit tests" - elif [ $utf -eq 0 ] ; then - echo " Skipped because UTF-$bits support is not available" - else - $sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry - checkresult $? 18 "" + if [ $do15 = yes ] ; then + echo $title15 + if [ $jit -ne 0 ] ; then + echo " Skipped because JIT is available" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry + checkresult $? 15 "" + fi + fi + + # Test JIT-specific features when JIT is available + + if [ $do16 = yes ] ; then + echo $title16 + if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then + echo " Skipped because JIT is not available or not usable" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry + checkresult $? 16 "" + fi + fi + + # Tests for the POSIX interface without UTF/UCP (8-bit only) + + if [ $do17 = yes ] ; then + echo $title17 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry + checkresult $? 17 "" + fi + fi + + # Tests for the POSIX interface with UTF/UCP (8-bit only) + + if [ $do18 = yes ] ; then + echo $title18 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry + checkresult $? 18 "" + fi fi -fi # End of loop for 8/16/32-bit tests done # Clean up local working files -rm -f test3input test3output test3outputA test3outputB teststdout testtry +rm -f testSinput test3input test3output test3outputA test3outputB teststdout testtry # End diff --git a/doc/pcre2_substring_number_from_name.3 b/doc/pcre2_substring_number_from_name.3 index d6588bc..12a9d3d 100644 --- a/doc/pcre2_substring_number_from_name.3 +++ b/doc/pcre2_substring_number_from_name.3 @@ -15,16 +15,17 @@ PCRE2 - Perl-compatible regular expressions (revised API) .rs .sp This convenience function finds the number of a named substring capturing -parenthesis in a compiled pattern. Its arguments are: +parenthesis in a compiled pattern, provided that it is a unique name. The +function arguments are: .sp \fIcode\fP Compiled regular expression \fIname\fP Name whose number is required .sp The yield of the function is the number of the parenthesis if the name is -found, or PCRE2_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed -(PCRE2_DUPNAMES is set), it is not defined which of the numbers is returned. -You can obtain the complete list by calling -\fBpcre2_substring_nametable_scan()\fP. +found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are +allowed (PCRE2_DUPNAMES is set), if the name is not unique, +PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers +with the same name by calling \fBpcre2_substring_nametable_scan()\fP. .P There is a complete description of the PCRE2 native API in the .\" HREF diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 16bbf0c..8c3e39d 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "25 October 2014" "PCRE2 10.00" +.TH PCRE2API 3 "29 October 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -2332,8 +2332,9 @@ the number of the subpattern called "xxx" is 2. If the name is known to be unique (PCRE2_DUPNAMES was not set), you can find the number from the name by calling \fBpcre2_substring_number_from_name()\fP. The first argument is the compiled pattern, and the second is the name. The yield of the function is the -subpattern number, or PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that -name. +subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that +name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of +that name. .P Given the number, you can extract the substring directly, or use one of the functions described in the previous section. For convenience, there are also @@ -2630,6 +2631,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 25 October 2014 +Last updated: 29 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 6b220ae..0f9bdef 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "11 October 2014" "PCRE 10.00" +.TH PCRE2TEST 1 "31 October 2014" "PCRE 10.00" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -383,6 +383,7 @@ which may also be used in a \fB#pattern\fP command. A pattern's modifier list can add to or override default modifiers that were set by a previous \fB#pattern\fP command. . +. .SS "Setting compilation options" .rs .sp @@ -420,6 +421,7 @@ non-printing characters in output strings to be printed using the \ex{hh...} notation. Otherwise, those less than 0x100 are output in hex without the curly brackets. . +. .SS "Setting compilation controls" .rs .sp @@ -566,6 +568,9 @@ also output. .sp The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested parentheses in a pattern. Breaching the limit causes a compilation error. +The default for the library is set when PCRE2 is built, but \fBpcre2test\fP +sets its own default of 220, which is required for running the standard test +suite. . . .SS "Using the POSIX wrapper API" @@ -631,12 +636,13 @@ However, they may be included in a pattern's modifier list, in which case they are applied to every subject line that is processed with that pattern. They do not affect the compilation process. .sp - aftertext show text after match - allaftertext show text after captures - allcaptures show all captures - allusedtext show all consulted text - /g global global matching - mark show mark values + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text + /g global global matching + mark show mark values + startchar show starting character when relevant .sp These modifiers may not appear in a \fB#pattern\fP command. If you want them as defaults, set them in a \fB#subject\fP command. @@ -710,6 +716,7 @@ pattern. offset= set starting offset ovector= set size of output vector recursion_limit= set a recursion limit + startchar show startchar when relevant .sp The effects of these modifiers are described in the following sections. FIXME: Give more examples. @@ -735,13 +742,28 @@ there is a lookbehind at the start of a match, or a lookahead at the end, or if of the actual match are indicated in the output by '<' or '>' characters underneath them. Here is an example: .sp - /(?<=pqr)abc(?=xyz)/ - 123pqrabcxyz456\e=allusedtext + re> /(?<=pqr)abc(?=xyz)/ + data> 123pqrabcxyz456\e=allusedtext 0: pqrabcxyz <<< >>> .sp This shows that the matched string is "abc", with the preceding and following strings "pqr" and "xyz" also consulted during the match. +.P +The \fBstartchar\fP modifier requests that the starting character for the match +be indicated, if it is different to the start of the matched string. The only +time when this occurs is when \eK has been processed as part of the match. In +this situation, the output for the matched string is displayed from the +starting character instead of from the match point, with circumflex characters +under the earlier characters. For example: +.sp + re> /abc\eKxyz/ + data> abcxyz\e=startchar + 0: abcxyz + ^^^ +.sp +Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT. +However, these two modifiers are mutually exclusive. . . .SS "Showing the value of all capture groups" @@ -890,11 +912,13 @@ appears, though of course it can also be used to set a default in a \fB#subject\fP command. It specifies the number of pairs of offsets that are available for storing matching information. The default is 15. .P -At least one pair of offsets is always created by -\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a -value of 0 is the same as 1. However a value of 0 is useful when testing the -POSIX API because it causes \fBregexec()\fP to be called with a NULL capture -vector. +A value of zero is useful when testing the POSIX API because it causes +\fBregexec()\fP to be called with a NULL capture vector. When not testing the +POSIX API, a value of zero is used to cause +\fBpcre2_match_data_create_from_pattern\fP to be called, in order to create a +match block of exactly the right size for the pattern. (It is not possible to +create a match block with a zero-length ovector; there is always one pair of +offsets.) . . .SH "THE ALTERNATIVE MATCHING FUNCTION" @@ -1159,6 +1183,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 11 October 2014 +Last updated: 31 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/src/pcre2.h.in b/src/pcre2.h.in index e8bc302..bbbf469 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -196,29 +196,30 @@ greater than zero. */ /* Error codes for pcre2[_dfa]_match(), substring extraction functions, and context functions. */ -#define PCRE2_ERROR_BADDATA (-29) -#define PCRE2_ERROR_BADLENGTH (-30) -#define PCRE2_ERROR_BADMAGIC (-31) -#define PCRE2_ERROR_BADMODE (-32) -#define PCRE2_ERROR_BADOFFSET (-33) -#define PCRE2_ERROR_BADOPTION (-34) -#define PCRE2_ERROR_BADUTFOFFSET (-35) -#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */ -#define PCRE2_ERROR_DFA_BADRESTART (-37) -#define PCRE2_ERROR_DFA_RECURSE (-38) -#define PCRE2_ERROR_DFA_UCOND (-39) -#define PCRE2_ERROR_DFA_UITEM (-40) -#define PCRE2_ERROR_DFA_WSSIZE (-41) -#define PCRE2_ERROR_INTERNAL (-42) -#define PCRE2_ERROR_JIT_BADOPTION (-43) -#define PCRE2_ERROR_JIT_STACKLIMIT (-44) -#define PCRE2_ERROR_MATCHLIMIT (-45) -#define PCRE2_ERROR_NOMEMORY (-46) -#define PCRE2_ERROR_NOSUBSTRING (-47) -#define PCRE2_ERROR_NULL (-48) -#define PCRE2_ERROR_RECURSELOOP (-49) -#define PCRE2_ERROR_RECURSIONLIMIT (-50) -#define PCRE2_ERROR_UNSET (-51) +#define PCRE2_ERROR_BADDATA (-29) +#define PCRE2_ERROR_BADLENGTH (-30) +#define PCRE2_ERROR_BADMAGIC (-31) +#define PCRE2_ERROR_BADMODE (-32) +#define PCRE2_ERROR_BADOFFSET (-33) +#define PCRE2_ERROR_BADOPTION (-34) +#define PCRE2_ERROR_BADUTFOFFSET (-35) +#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */ +#define PCRE2_ERROR_DFA_BADRESTART (-37) +#define PCRE2_ERROR_DFA_RECURSE (-38) +#define PCRE2_ERROR_DFA_UCOND (-39) +#define PCRE2_ERROR_DFA_UITEM (-40) +#define PCRE2_ERROR_DFA_WSSIZE (-41) +#define PCRE2_ERROR_INTERNAL (-42) +#define PCRE2_ERROR_JIT_BADOPTION (-43) +#define PCRE2_ERROR_JIT_STACKLIMIT (-44) +#define PCRE2_ERROR_MATCHLIMIT (-45) +#define PCRE2_ERROR_NOMEMORY (-46) +#define PCRE2_ERROR_NOSUBSTRING (-47) +#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48) +#define PCRE2_ERROR_NULL (-49) +#define PCRE2_ERROR_RECURSELOOP (-50) +#define PCRE2_ERROR_RECURSIONLIMIT (-51) +#define PCRE2_ERROR_UNSET (-52) /* Request types for pcre2_pattern_info() */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 78182b2..c0dbe8a 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -6068,7 +6068,7 @@ for (;; ptr++) /* Process nested bracketed regex. First check for parentheses nested too deeply. */ - if ((cb->parens_depth += 1) > PARENS_NEST_LIMIT) + if ((cb->parens_depth += 1) > (int)(cb->cx->parens_nest_limit)) { *errorcodeptr = ERR19; goto FAILED; @@ -7786,7 +7786,7 @@ if (cb.hwm > cb.start_workspace) NULL to indicate that forward references have been filled in. */ if (cb.workspace_size > COMPILE_WORK_SIZE) - ccontext->memctl.free((void *)cb.start_workspace, + ccontext->memctl.free((void *)cb.start_workspace, ccontext->memctl.memory_data); cb.start_workspace = NULL; diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 192c806..d1a253d 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -221,9 +221,10 @@ static const char match_error_texts[] = "match limit exceeded\0" "no more memory\0" "unknown or unset substring\0" + "non-unique substring name\0" "NULL argument passed\0" - "nested recursion at the same subject position\0" /* 50 */ + "nested recursion at the same subject position\0" "recursion limit exceeded\0" "requested value is not set\0" ; diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c index d440d9e..54dba24 100644 --- a/src/pcre2_jit_test.c +++ b/src/pcre2_jit_test.c @@ -1127,7 +1127,7 @@ static int regression_tests(void) #elif defined SUPPORT_PCRE2_16 pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info); #elif defined SUPPORT_PCRE2_32 - pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info)); + pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info); #endif printf("Running JIT regression tests\n"); diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c index 0b42d30..3faecd0 100644 --- a/src/pcre2_substring.c +++ b/src/pcre2_substring.c @@ -409,7 +409,8 @@ Arguments: firstptr where to put the pointer to the first entry lastptr where to put the pointer to the last entry -Returns: if firstptr and lastptr are NULL, a group number; +Returns: if firstptr and lastptr are NULL, a group number for a + unique substring, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise, the length of each entry, or a negative number (PCRE2_ERROR_NOSUBSTRING) if not found */ @@ -433,7 +434,6 @@ while (top > bot) PCRE2_SPTR first; PCRE2_SPTR last; PCRE2_SPTR lastentry; - if (firstptr == NULL) return GET2(entry, 0); lastentry = nametable + entrysize * (code->name_count - 1); first = last = entry; while (first > nametable) @@ -446,6 +446,8 @@ while (top > bot) if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break; last += entrysize; } + if (firstptr == NULL) + return (first == last)? (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING; *firstptr = first; *lastptr = last; return entrysize; diff --git a/src/pcre2test.c b/src/pcre2test.c index 12f0d39..e3f55b8 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -319,29 +319,30 @@ enum { MOD_CTC, /* Applies to a compile context */ /* Control bits. Some apply to compiling, some to matching, but some can be set either on a pattern or a data line, so they must all be distinct. */ -#define CTL_AFTERTEXT 0x00000001u -#define CTL_ALLAFTERTEXT 0x00000002u -#define CTL_ALLCAPTURES 0x00000004u -#define CTL_ALLUSEDTEXT 0x00000008u -#define CTL_ALTGLOBAL 0x00000010u -#define CTL_BINCODE 0x00000020u -#define CTL_CALLOUT_CAPTURE 0x00000040u -#define CTL_CALLOUT_NONE 0x00000080u -#define CTL_DFA 0x00000100u -#define CTL_FINDLIMITS 0x00000200u -#define CTL_FULLBINCODE 0x00000400u -#define CTL_GETALL 0x00000800u -#define CTL_GLOBAL 0x00001000u -#define CTL_HEXPAT 0x00002000u -#define CTL_INFO 0x00004000u -#define CTL_JITVERIFY 0x00008000u -#define CTL_MARK 0x00010000u -#define CTL_MEMORY 0x00020000u -#define CTL_PATLEN 0x00040000u -#define CTL_POSIX 0x00080000u +#define CTL_AFTERTEXT 0x00000001u +#define CTL_ALLAFTERTEXT 0x00000002u +#define CTL_ALLCAPTURES 0x00000004u +#define CTL_ALLUSEDTEXT 0x00000008u +#define CTL_ALTGLOBAL 0x00000010u +#define CTL_BINCODE 0x00000020u +#define CTL_CALLOUT_CAPTURE 0x00000040u +#define CTL_CALLOUT_NONE 0x00000080u +#define CTL_DFA 0x00000100u +#define CTL_FINDLIMITS 0x00000200u +#define CTL_FULLBINCODE 0x00000400u +#define CTL_GETALL 0x00000800u +#define CTL_GLOBAL 0x00001000u +#define CTL_HEXPAT 0x00002000u +#define CTL_INFO 0x00004000u +#define CTL_JITVERIFY 0x00008000u +#define CTL_MARK 0x00010000u +#define CTL_MEMORY 0x00020000u +#define CTL_PATLEN 0x00040000u +#define CTL_POSIX 0x00080000u +#define CTL_STARTCHAR 0x00100000u -#define CTL_BSR_SET 0x00100000u /* This is informational */ -#define CTL_NL_SET 0x00200000u /* This is informational */ +#define CTL_BSR_SET 0x80000000u /* This is informational */ +#define CTL_NL_SET 0x40000000u /* This is informational */ #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE) /* For testing */ @@ -358,7 +359,8 @@ data line. */ CTL_GLOBAL|\ CTL_JITVERIFY|\ CTL_MARK|\ - CTL_MEMORY) + CTL_MEMORY|\ + CTL_STARTCHAR) typedef struct patctl { /* Structure for pattern modifiers. */ uint32_t options; /* Must be in same position as datctl */ @@ -476,6 +478,7 @@ static modstruct modlist[] = { { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, + { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, @@ -499,6 +502,10 @@ static modstruct modlist[] = { #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) +/* Controls that are mutually exclusive. */ + +#define EXCLUSIVE_DAT_CONTROLS (CTL_ALLUSEDTEXT|CTL_STARTCHAR) + /* Table of single-character abbreviated modifiers. The index field is initialized to -1, but the first time the modifier is encountered, it is filled in with the index of the full entry in modlist, to save repeated searching when @@ -654,7 +661,7 @@ static uint8_t *dbuffer = NULL; #ifdef SUPPORT_PCRE2_8 static pcre2_code_8 *compiled_code8; -static pcre2_general_context_8 *general_context8; +static pcre2_general_context_8 *general_context8, *general_context_copy8; static pcre2_compile_context_8 *pat_context8, *default_pat_context8; static pcre2_match_context_8 *dat_context8, *default_dat_context8; static pcre2_match_data_8 *match_data8; @@ -662,7 +669,7 @@ static pcre2_match_data_8 *match_data8; #ifdef SUPPORT_PCRE2_16 static pcre2_code_16 *compiled_code16; -static pcre2_general_context_16 *general_context16; +static pcre2_general_context_16 *general_context16, *general_context_copy16; static pcre2_compile_context_16 *pat_context16, *default_pat_context16; static pcre2_match_context_16 *dat_context16, *default_dat_context16; static pcre2_match_data_16 *match_data16; @@ -672,7 +679,7 @@ static uint16_t *pbuffer16 = NULL; #ifdef SUPPORT_PCRE2_32 static pcre2_code_32 *compiled_code32; -static pcre2_general_context_32 *general_context32; +static pcre2_general_context_32 *general_context32, *general_context_copy32; static pcre2_compile_context_32 *pat_context32, *default_pat_context32; static pcre2_match_context_32 *dat_context32, *default_dat_context32; static pcre2_match_data_32 *match_data32; @@ -771,11 +778,32 @@ are supported. */ else \ r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_get_ovector_count_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_get_ovector_count_16(G(b,16)); \ + else \ + a = pcre2_get_ovector_count_32(G(b,32)) + +#define PCRE2_GET_STARTCHAR(a,b) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_get_startchar_8(G(b,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_get_startchar_16(G(b,16)); \ + else \ + a = pcre2_get_startchar_32(G(b,32)) + #define PCRE2_JIT_COMPILE(a,b) \ if (test_mode == PCRE8_MODE) pcre2_jit_compile_8(G(a,8),b); \ else if (test_mode == PCRE16_MODE) pcre2_jit_compile_16(G(a,16),b); \ else pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ + if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \ + else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \ + else pcre2_jit_free_unused_memory_32(G(a,32)) + #define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \ if (test_mode == PCRE8_MODE) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d); \ @@ -821,6 +849,14 @@ are supported. */ else \ G(a,32) = pcre2_match_data_create_32(b,c) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \ + else \ + G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) + #define PCRE2_MATCH_DATA_FREE(a) \ if (test_mode == PCRE8_MODE) \ pcre2_match_data_free_8(G(a,8)); \ @@ -877,6 +913,14 @@ are supported. */ else \ pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_parens_nest_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_parens_nest_limit_16(G(a,16),b); \ + else \ + pcre2_set_parens_nest_limit_32(G(a,32),b) + #define PCRE2_SET_RECURSION_LIMIT(a,b) \ if (test_mode == PCRE8_MODE) \ pcre2_set_recursion_limit_8(G(a,8),b); \ @@ -923,6 +967,22 @@ are supported. */ else \ a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \ + else \ + a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) + +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \ + else \ + a = pcre2_substring_length_bynumber_32(G(b,32),c,d) + #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ if (test_mode == PCRE8_MODE) \ a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \ @@ -939,6 +999,14 @@ are supported. */ else \ pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \ + else if (test_mode == PCRE16_MODE) \ + a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \ + else \ + a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)) + #define PTR(x) ( \ (test_mode == PCRE8_MODE)? (void *)G(x,8) : \ (test_mode == PCRE16_MODE)? (void *)G(x,16) : \ @@ -1082,12 +1150,30 @@ the three different cases. */ else \ r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \ + else \ + a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO)) + +#define PCRE2_GET_STARTCHAR(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \ + else \ + a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO)) + #define PCRE2_JIT_COMPILE(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ else \ G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \ + else \ + G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO)) + #define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITONE)(b,c,d); \ @@ -1126,6 +1212,12 @@ the three different cases. */ else \ G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \ + else \ + G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c) + #define PCRE2_MATCH_DATA_FREE(a) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \ @@ -1170,6 +1262,12 @@ the three different cases. */ else \ G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b) + #define PCRE2_SET_RECURSION_LIMIT(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \ @@ -1213,6 +1311,18 @@ the three different cases. */ a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\ (G(PCRE2_UCHAR,BITTWO) **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \ + else \ + a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d) + +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \ + else \ + a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d) + #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \ @@ -1227,6 +1337,12 @@ the three different cases. */ else \ G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \ + else \ + a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO)) + #define PTR(x) ( \ (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \ (void *)G(x,BITTWO)) @@ -1298,7 +1414,10 @@ the three different cases. */ a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8),i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_8(G(a,8),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) #define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1308,6 +1427,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8)) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) @@ -1317,6 +1438,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \ pcre2_set_compile_recursion_guard_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e) @@ -1327,10 +1449,16 @@ the three different cases. */ a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e) #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_8(G(b,8),c,d) #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d) #define PCRE2_SUBSTRING_LIST_FREE(a) \ pcre2_substring_list_free_8((PCRE2_SPTR8 *)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); #define PTR(x) (void *)G(x,8) #define SETFLD(x,y,z) G(x,8)->y = z #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z @@ -1362,7 +1490,10 @@ the three different cases. */ a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16),i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16)) #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) #define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1372,6 +1503,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16)) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d) #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a) @@ -1381,6 +1514,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \ pcre2_set_compile_recursion_guard_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e) @@ -1391,10 +1525,16 @@ the three different cases. */ a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e) #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_16(G(b,16),c,d) #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d) #define PCRE2_SUBSTRING_LIST_FREE(a) \ pcre2_substring_list_free_16((PCRE2_SPTR16 *)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); #define PTR(x) (void *)G(x,16) #define SETFLD(x,y,z) G(x,16)->y = z #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z @@ -1426,7 +1566,10 @@ the three different cases. */ a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32),i,j) #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size)) +#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32)) +#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32)) #define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b) +#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) #define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \ a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d); #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ @@ -1436,6 +1579,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32)) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c) +#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ + G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d) #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a) @@ -1445,6 +1590,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \ pcre2_set_compile_recursion_guard_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) @@ -1455,10 +1601,16 @@ the three different cases. */ a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) +#define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ + a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) +#define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ + a = pcre2_substring_length_bynumber_32(G(b,32),c,d) #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) #define PCRE2_SUBSTRING_LIST_FREE(a) \ pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) +#define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ + a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)); #define PTR(x) (void *)G(x,32) #define SETFLD(x,y,z) G(x,32)->y = z #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z @@ -2978,43 +3130,48 @@ fprintf(outfile, "%s %s", *msg, s); -#ifdef SUPPORT_PCRE2_8 /************************************************* -* Show compile controls * +* Show control bits * *************************************************/ -/* Called for unsupported POSIX modifiers, and therefore needed only when the -8-bit library is supported. +/* Called for mutually exclusive controls and for unsupported POSIX controls. +Because the bits are unique, this can be used for both pattern and data control +words. Arguments: controls control bits before text to print before - after text to print after Returns: nothing */ static void -show_compile_controls(uint32_t controls, const char *before, const char *after) +show_controls(uint32_t controls, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "", + ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", ((controls & CTL_BINCODE) != 0)? " bincode" : "", + ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", + ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", + ((controls & CTL_DFA) != 0)? " dfa" : "", + ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "", + ((controls & CTL_GETALL) != 0)? " getall" : "", ((controls & CTL_GLOBAL) != 0)? " global" : "", ((controls & CTL_HEXPAT) != 0)? " hex" : "", ((controls & CTL_INFO) != 0)? " info" : "", ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", ((controls & CTL_MARK) != 0)? " mark" : "", + ((controls & CTL_MEMORY) != 0)? " memory" : "", ((controls & CTL_PATLEN) != 0)? " use_length" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", - after); + ((controls & CTL_STARTCHAR) != 0)? " startchar" : ""); } -#endif /* SUPPORT_PCRE2_8 */ @@ -3064,34 +3221,6 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", -#ifdef SUPPORT_PCRE2_8 -/************************************************* -* Show match controls * -*************************************************/ - -/* Called for unsupported POSIX modifiers. */ - -static void -show_match_controls(uint32_t controls) -{ -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s", - ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", - ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", - ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "", - ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", - ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", - ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", - ((controls & CTL_DFA) != 0)? " dfa" : "", - ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", - ((controls & CTL_GETALL) != 0)? " getall" : "", - ((controls & CTL_GLOBAL) != 0)? " global" : "", - ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", - ((controls & CTL_MARK) != 0)? " mark" : "", - ((controls & CTL_MEMORY) != 0)? " memory" : ""); -} -#endif /* SUPPORT_PCRE2_8 */ - - #ifdef SUPPORT_PCRE2_8 /************************************************* * Show match options * @@ -3635,8 +3764,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) } if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0) { - show_compile_controls( - pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg, ""); + show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg); msg = ""; } @@ -4345,6 +4473,16 @@ c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0; if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) return PR_OK; +/* Check for mutually exclusive modifiers. */ + +c = dat_datctl.control & EXCLUSIVE_DAT_CONTROLS; +if (c - (c & -c) != 0) + { + show_controls(c, "** Not allowed together:"); + fprintf(outfile, "\n"); + return PR_OK; + } + /* Now run the pattern match: len contains the byte length, ulen contains the code unit length, and pp points to the subject string. POSIX matching is only possible in 8-bit mode, and it does not support timing or other fancy features. @@ -4375,8 +4513,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) } if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0) { - fprintf(outfile, "%s", msg); - show_match_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS); + show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, msg); msg = ""; } @@ -4445,11 +4582,6 @@ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && dat_datctl.control &= ~CTL_ALLUSEDTEXT; } -/* As pcre2_match_data_create() imposes a minimum of 1 on the ovector count, we -must do so too. */ - -if (dat_datctl.oveccount < 1) dat_datctl.oveccount = 1; - /* Enable display of malloc/free if wanted. */ show_memory = (dat_datctl.control & CTL_MEMORY) != 0; @@ -4485,9 +4617,16 @@ if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, NULL); } -/* Adjust match_data according to size of offsets required. */ +/* Adjust match_data according to size of offsets required. A size of zero +causes a new match data block to be obtained that exactly fits the pattern. */ -if (dat_datctl.oveccount <= max_oveccount) +if (dat_datctl.oveccount == 0) + { + PCRE2_MATCH_DATA_FREE(match_data); + PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL); + PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data); + } +else if (dat_datctl.oveccount <= max_oveccount) { SETFLD(match_data, oveccount, dat_datctl.oveccount); } @@ -4619,16 +4758,18 @@ for (gmatched = 0;; gmatched++) if (capcount >= 0) { int i; + uint32_t oveccount; uint8_t *nptr; /* This is a check against a lunatic return value. */ - if (capcount > (int)dat_datctl.oveccount) + PCRE2_GET_OVECTOR_COUNT(oveccount, match_data); + if (capcount > (int)oveccount) { fprintf(outfile, "** PCRE2 error: returned count %d is too big for ovector count %d\n", - capcount, dat_datctl.oveccount); - capcount = dat_datctl.oveccount; + capcount, oveccount); + capcount = oveccount; if ((dat_datctl.control & CTL_ANYGLOB) != 0) { fprintf(outfile, "** Global loop abandoned\n"); @@ -4638,9 +4779,8 @@ for (gmatched = 0;; gmatched++) /* If this is not the first time round a global loop, check that the returned string has changed. If not, there is a bug somewhere and we must - break the loop because it will go on for ever. We know that for a global - match there must be at least two elements in the ovector. This is checked - above. */ + break the loop because it will go on for ever. We know that there are + always at least two elements in the ovector. */ if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) { @@ -4660,7 +4800,7 @@ for (gmatched = 0;; gmatched++) if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) return PR_SKIP; capcount = maxcapcount + 1; /* Allow for full match */ - if (capcount > (int)dat_datctl.oveccount) capcount = dat_datctl.oveccount; + if (capcount > (int)oveccount) capcount = oveccount; } /* Output the captured substrings. Note that, for the matched string, @@ -4710,26 +4850,46 @@ for (gmatched = 0;; gmatched++) if (showallused) { + PCRE2_SIZE j; PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); PCHARS(lmiddle, pp, start, end - start, utf, outfile); PCHARS(lright, pp, end, rightchar - end, utf, outfile); - } - else - { - PCHARSV(pp, start, end - start, utf, outfile); - } - - if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); - - if (showallused) - { - PCRE2_SIZE j; + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); fprintf(outfile, "\n "); for (j = 0; j < lleft; j++) fprintf(outfile, "<"); for (j = 0; j < lmiddle; j++) fprintf(outfile, " "); for (j = 0; j < lright; j++) fprintf(outfile, ">"); } + + /* When a pattern contains \K, the start of match position may be + different to the start of the matched string. When this is the case, + show it when requested. */ + + else if ((dat_datctl.control & CTL_STARTCHAR) != 0) + { + PCRE2_SIZE startchar; + PCRE2_GET_STARTCHAR(startchar, match_data); + PCHARS(lleft, pp, startchar, start - startchar, utf, outfile); + PCHARSV(pp, start, end - start, utf, outfile); + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + if (startchar != start) + { + PCRE2_SIZE j; + fprintf(outfile, "\n "); + for (j = 0; j < lleft; j++) fprintf(outfile, "^"); + } + } + + /* Otherwise, just show the matched string. */ + + else + { + PCHARSV(pp, start, end - start, utf, outfile); + if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) + fprintf(outfile, " (JIT)"); + } } /* Not the main matched string. Just show it unadorned. */ @@ -4768,20 +4928,33 @@ for (gmatched = 0;; gmatched++) for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) { int rc; - PCRE2_SIZE length; + PCRE2_SIZE length, length2; uint32_t copybuffer[256]; uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]); length = sizeof(copybuffer)/code_unit_size; PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length); if (rc < 0) { - fprintf(outfile, "copy substring %d failed (%d): ", n, rc); + fprintf(outfile, "Copy substring %d failed (%d): ", n, rc); PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); fprintf(outfile, "\n"); } else { + PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2); + if (rc < 0) + { + fprintf(outfile, "Get substring %d length failed (%d): ", n, rc); + PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); + PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); + fprintf(outfile, "\n"); + } + else if (length2 != length) + { + fprintf(outfile, "Mismatched substring lengths: %ld %ld\n", + length, length2); + } fprintf(outfile, "%2dC ", n); PCHARSV(copybuffer, 0, length, utf, outfile); fprintf(outfile, " (%lu)\n", (unsigned long)length); @@ -4794,7 +4967,8 @@ for (gmatched = 0;; gmatched++) for (;;) { int rc; - PCRE2_SIZE length; + int groupnumber; + PCRE2_SIZE length, length2; uint32_t copybuffer[256]; int namelen = strlen((const char *)nptr); #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 @@ -4812,20 +4986,39 @@ for (gmatched = 0;; gmatched++) if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); #endif + PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); + if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) + fprintf(outfile, "Number not found for group '%s'\n", nptr); + length = sizeof(copybuffer)/code_unit_size; PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); if (rc < 0) { - fprintf(outfile, "copy substring '%s' failed (%d): ", nptr, rc); + fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc); PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); fprintf(outfile, "\n"); } else { + PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); + if (rc < 0) + { + fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc); + PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); + PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); + fprintf(outfile, "\n"); + } + else if (length2 != length) + { + fprintf(outfile, "Mismatched substring lengths: %ld %ld\n", + length, length2); + } fprintf(outfile, " C "); PCHARSV(copybuffer, 0, length, utf, outfile); - fprintf(outfile, " (%lu) %s\n", (unsigned long)length, nptr); + fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr); + if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); + else fprintf(outfile, " (non-unique)\n"); } nptr += namelen + 1; } @@ -4841,7 +5034,7 @@ for (gmatched = 0;; gmatched++) PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length); if (rc < 0) { - fprintf(outfile, "get substring %d failed (%d): ", n, rc); + fprintf(outfile, "Get substring %d failed (%d): ", n, rc); PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); fprintf(outfile, "\n"); @@ -4863,6 +5056,7 @@ for (gmatched = 0;; gmatched++) PCRE2_SIZE length; void *gotbuffer; int rc; + int groupnumber; int namelen = strlen((const char *)nptr); #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 PCRE2_SIZE cnl = namelen; @@ -4879,10 +5073,14 @@ for (gmatched = 0;; gmatched++) if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); #endif + PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); + if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) + fprintf(outfile, "Number not found for group '%s'\n", nptr); + PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); if (rc < 0) { - fprintf(outfile, "get substring '%s' failed (%d): ", nptr, rc); + fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc); PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); fprintf(outfile, "\n"); @@ -4891,7 +5089,9 @@ for (gmatched = 0;; gmatched++) { fprintf(outfile, " G "); PCHARSV(gotbuffer, 0, length, utf, outfile); - fprintf(outfile, " (%lu) %s\n", (unsigned long)length, nptr); + fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr); + if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); + else fprintf(outfile, " (non-unique)\n"); PCRE2_SUBSTRING_FREE(gotbuffer); } nptr += namelen + 1; @@ -5599,61 +5799,71 @@ while (argc > 1 && argv[op][0] == '-') } /* Initialize things that cannot be done until we know which test mode we are -running in. */ +running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_ +memory_management() is a no-op, but we call it in order to exercise it. Also +exercise the general context copying function, which is not otherwise used. */ code_unit_size = test_mode/8; max_oveccount = DEFAULT_OVECCOUNT; +/* Use macros to save a lot of duplication. */ + +#define CREATECONTEXTS \ + G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \ + G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \ + G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \ + G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \ + G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \ + G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \ + G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS)) + +#ifdef HEAP_MATCH_RECURSE +#define SETRECURSEMEMMAN \ + (void)G(pcre2_set_recursion_memory_management_,BITS) \ + (G(default_dat_context,BITS), \ + &my_stack_malloc, &my_stack_free, NULL) +#else +#define SETRECURSEMEMMAN \ + (void)G(pcre2_set_recursion_memory_management_,BITS)(NULL, NULL, NULL, NULL) +#endif + +/* Call the appropriate functions for the current mode. */ + #ifdef SUPPORT_PCRE2_8 +#undef BITS +#define BITS 8 if (test_mode == PCRE8_MODE) { - general_context8 = pcre2_general_context_create_8(&my_malloc, &my_free, NULL); - default_pat_context8 = pcre2_compile_context_create_8(general_context8); - pat_context8 = pcre2_compile_context_create_8(general_context8); - default_dat_context8 = pcre2_match_context_create_8(general_context8); - dat_context8 = pcre2_match_context_create_8(general_context8); - match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8); -#ifdef HEAP_MATCH_RECURSE - (void)pcre2_set_recursion_memory_management_8(default_dat_context8, - &my_stack_malloc, &my_stack_free, NULL); -#endif + CREATECONTEXTS; + SETRECURSEMEMMAN; } #endif #ifdef SUPPORT_PCRE2_16 +#undef BITS +#define BITS 16 if (test_mode == PCRE16_MODE) { - general_context16 = pcre2_general_context_create_16(&my_malloc, &my_free, - NULL); - default_pat_context16 = pcre2_compile_context_create_16(general_context16); - pat_context16 = pcre2_compile_context_create_16(general_context16); - default_dat_context16 = pcre2_match_context_create_16(general_context16); - dat_context16 = pcre2_match_context_create_16(general_context16); - match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16); -#ifdef HEAP_MATCH_RECURSE - (void)pcre2_set_recursion_memory_management_16(default_dat_context16, - &my_stack_malloc, &my_stack_free, NULL); -#endif + CREATECONTEXTS; + SETRECURSEMEMMAN; } #endif #ifdef SUPPORT_PCRE2_32 +#undef BITS +#define BITS 32 if (test_mode == PCRE32_MODE) { - general_context32 = pcre2_general_context_create_32(&my_malloc, &my_free, - NULL); - default_pat_context32 = pcre2_compile_context_create_32(general_context32); - pat_context32 = pcre2_compile_context_create_32(general_context32); - default_dat_context32 = pcre2_match_context_create_32(general_context32); - dat_context32 = pcre2_match_context_create_32(general_context32); - match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32); -#ifdef HEAP_MATCH_RECURSE - (void)pcre2_set_recursion_memory_management_32(default_dat_context32, - &my_stack_malloc, &my_stack_free, NULL); -#endif + CREATECONTEXTS; + SETRECURSEMEMMAN; } #endif +/* Set a default parentheses nest limit that is large enough to run the +standard tests (this also exercises the function). */ + +PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220); + /* Handle command line modifier settings, sending any error messages to stderr. We need to know the mode before modifying the context, and it is tidier to do them all in the same way. */ @@ -5818,36 +6028,39 @@ free((void *)locale_tables); PCRE2_MATCH_DATA_FREE(match_data); SUB1(pcre2_code_free, compiled_code); +PCRE2_JIT_FREE_UNUSED_MEMORY(general_context); if (jit_stack != NULL) { PCRE2_JIT_STACK_FREE(jit_stack); } +#define FREECONTEXTS \ + G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \ + G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \ + G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \ + G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \ + G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \ + G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)) + #ifdef SUPPORT_PCRE2_8 +#undef BITS +#define BITS 8 regfree(&preg); -pcre2_general_context_free_8(general_context8); -pcre2_compile_context_free_8(pat_context8); -pcre2_compile_context_free_8(default_pat_context8); -pcre2_match_context_free_8(dat_context8); -pcre2_match_context_free_8(default_dat_context8); +FREECONTEXTS; #endif #ifdef SUPPORT_PCRE2_16 +#undef BITS +#define BITS 16 free(pbuffer16); -pcre2_general_context_free_16(general_context16); -pcre2_compile_context_free_16(pat_context16); -pcre2_compile_context_free_16(default_pat_context16); -pcre2_match_context_free_16(dat_context16); -pcre2_match_context_free_16(default_dat_context16); +FREECONTEXTS; #endif #ifdef SUPPORT_PCRE2_32 +#undef BITS +#define BITS 32 free(pbuffer32); -pcre2_general_context_free_32(general_context32); -pcre2_compile_context_free_32(pat_context32); -pcre2_compile_context_free_32(default_pat_context32); -pcre2_match_context_free_32(dat_context32); -pcre2_match_context_free_32(default_dat_context32); +FREECONTEXTS; #endif #if defined(__VMS) diff --git a/testdata/grepoutput b/testdata/grepoutput index f8b029f..97af187 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -392,7 +392,7 @@ pcre2grep: pcre2_match() gave error -45 while matching this text: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded. +pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. ---------------------------- Test 38 ------------------------------ This line contains a binary zero here >< for testing. @@ -516,17 +516,17 @@ This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded. +pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 63 ----------------------------- -pcre2grep: pcre2_match() gave error -50 while matching text that starts: +pcre2grep: pcre2_match() gave error -51 while matching text that starts: This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcre2grep: Error -44, -45 or -50 means that a resource limit was exceeded. +pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 64 ------------------------------ diff --git a/testdata/testinput17 b/testdata/testinput17 index 7240158..01f8d9d 100644 --- a/testdata/testinput17 +++ b/testdata/testinput17 @@ -5,6 +5,18 @@ #forbid_utf #pattern posix +# Test invalid options + +/abc/auto_callout + +/abc/ + abc\=find_limits + +/abc/ + abc\=partial_hard + +# Real tests + /abc/ abc *** Failers diff --git a/testdata/testinput2 b/testdata/testinput2 index 548224f..c48999f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2618,7 +2618,7 @@ a random value. /Ix YXYYY\=ps YXYYYY\=ps -/\++\KZ|\d+X|9+Y/ +/\++\KZ|\d+X|9+Y/startchar ++++123999\=ps ++++123999Y\=ps ++++Z1234\=ps @@ -2655,7 +2655,7 @@ a random value. /Ix abc\=ps abc\=ph -/abc\K123/ +/abc\K123/startchar xyzabc123pqr xyzabc12\=ps xyzabc12\=ph @@ -2676,7 +2676,7 @@ a random value. /Ix /(ab)(x(y)z(cd(*ACCEPT)))pq/B -/abc\K/aftertext +/abc\K/aftertext,startchar abcdef abcdef\=notempty_atstart xyzabcdef\=notempty_atstart @@ -2684,7 +2684,7 @@ a random value. /Ix abcdef\=notempty xyzabcdef\=notempty -/^(?:(?=abc)|abc\K)/aftertext +/^(?:(?=abc)|abc\K)/aftertext,startchar abcdef abcdef\=notempty_atstart ** Failers @@ -2923,7 +2923,7 @@ a random value. /Ix believe this to be a Perl bug. --/ /(?>a\Kb)z|(ab)/ - ab + ab\=startchar /(?P(?P0|)|(?P>L2)(?P>L1))/ @@ -3643,7 +3643,7 @@ a random value. /Ix xxxx123a\=ph xxxx123a\=ps -/123\Kabc/ +/123\Kabc/startchar xxxx123a\=ph xxxx123a\=ps @@ -3896,7 +3896,7 @@ a random value. /Ix /[a[:<:]] should give error/ /(?=ab\K)/aftertext - abcd + abcd\=startchar /abcd/newline=lf,firstline xx\nxabcd @@ -4006,4 +4006,6 @@ a random value. /Ix /\k*(?aa)(?bb)/match_unset_backref,dupnames aabb +/(((((a)))))/parens_nest_limit=2 + # End of testinput2 diff --git a/testdata/testinput5 b/testdata/testinput5 index a05f1d7..51767a8 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1623,6 +1623,9 @@ scat /\X?abc/utf,no_start_optimize -\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + +/\x{100}\x{200}\K\x{300}/utf,startchar + \x{100}\x{200}\x{300} # End of testinput5 diff --git a/testdata/testoutput14 b/testdata/testoutput14 index b3fc8d3..d248879 100644 --- a/testdata/testoutput14 +++ b/testdata/testoutput14 @@ -118,7 +118,7 @@ Failed: error -45: match limit exceeded /(a+)*zz/ aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -50: recursion limit exceeded +Failed: error -51: recursion limit exceeded /(*LIMIT_MATCH=3000)(a+)*zz/I Capturing subpattern count = 1 @@ -158,9 +158,9 @@ Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz -Failed: error -50: recursion limit exceeded +Failed: error -51: recursion limit exceeded aaaaaaaaaaaaaz\=recursion_limit=1000 -Failed: error -50: recursion limit exceeded +Failed: error -51: recursion limit exceeded /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I Capturing subpattern count = 1 @@ -180,21 +180,21 @@ Subject length lower bound = 2 aaaaaaaaaaaaaz No match aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -50: recursion limit exceeded +Failed: error -51: recursion limit exceeded # These three have infinitely nested recursions. /((?2))((?1))/ abc -Failed: error -49: nested recursion at the same subject position +Failed: error -50: nested recursion at the same subject position /((?(R2)a+|(?1)b))/ aaaabcde -Failed: error -49: nested recursion at the same subject position +Failed: error -50: nested recursion at the same subject position /(?(R)a*(?1)|((?R))b)/ aaaabcde -Failed: error -49: nested recursion at the same subject position +Failed: error -50: nested recursion at the same subject position # The allusedtext modifier does not work with JIT, which does not maintain # the leftchar/rightchar data. diff --git a/testdata/testoutput17 b/testdata/testoutput17 index 1a3de59..9cfe3d9 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -5,6 +5,23 @@ #forbid_utf #pattern posix +# Test invalid options + +/abc/auto_callout +** Ignored with POSIX interface: auto_callout + +/abc/ + abc\=find_limits +** Ignored with POSIX interface: find_limits + 0: abc + +/abc/ + abc\=partial_hard +** Ignored with POSIX interface: partial_hard + 0: abc + +# Real tests + /abc/ abc 0: abc diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 177c810..1ed51f4 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -244,8 +244,10 @@ Subject length lower bound = 4 2: b 3: c abcb\=ovector=0 -Matched, but too many substrings 0: abcb + 1: a + 2: b + 3: c abcb\=ovector=1 Matched, but too many substrings 0: abcb @@ -273,8 +275,8 @@ Subject length lower bound = 3 0: abc 1: a abc\=ovector=0 -Matched, but too many substrings 0: abc + 1: a abc\=ovector=1 Matched, but too many substrings 0: abc @@ -287,8 +289,10 @@ Matched, but too many substrings 2: a 3: b aba\=ovector=0 -Matched, but too many substrings 0: aba + 1: + 2: a + 3: b aba\=ovector=1 Matched, but too many substrings 0: aba @@ -989,7 +993,7 @@ Subject length lower bound = 4 0: abcd 1: a 2: d -copy substring 5 failed (-47): unknown or unset substring +Copy substring 5 failed (-47): unknown or unset substring /(.{20})/I Capturing subpattern count = 1 @@ -1043,9 +1047,9 @@ Subject length lower bound = 4 2: 3: f 1G a (1) -get substring 2 failed (-47): unknown or unset substring +Get substring 2 failed (-47): unknown or unset substring 3G f (1) -get substring 4 failed (-47): unknown or unset substring +Get substring 4 failed (-47): unknown or unset substring 0L adef 1L a 2L @@ -1058,7 +1062,7 @@ get substring 4 failed (-47): unknown or unset substring 1G bc (2) 2G bc (2) 3G f (1) -get substring 4 failed (-47): unknown or unset substring +Get substring 4 failed (-47): unknown or unset substring 0L bcdef 1L bc 2L bc @@ -4347,18 +4351,19 @@ Subject length lower bound = 8 1: cd 2: gh 1C cd (2) - G gh (2) two + G gh (2) two (group 2) abcdefgh\=copy=one,copy=two 0: abcdefgh 1: cd 2: gh - C cd (2) one - C gh (2) two + C cd (2) one (group 1) + C gh (2) two (group 2) abcdefgh\=copy=three 0: abcdefgh 1: cd 2: gh -copy substring 'three' failed (-47): unknown or unset substring +Number not found for group 'three' +Copy substring 'three' failed (-47): unknown or unset substring /(?P)(?P)/IB ------------------------------------------------------------------ @@ -4406,12 +4411,12 @@ Subject length lower bound = 4 0: zzaa 1: zz 2: aa - C zz (2) Z + C zz (2) Z (group 1) zzaa\=copy=A 0: zzaa 1: zz 2: aa - C aa (2) A + C aa (2) A (group 2) /(?Peks)(?Peccs)/I Failed: error 143 at offset 15: two named subpatterns have the same name (PCRE2_DUPNAMES not set) @@ -5712,21 +5717,22 @@ Subject length lower bound = 2 0: a1 1: a1 2: a1 - C a1 (2) A + C a1 (2) A (non-unique) a2b\=copy=A 0: a2b 1: a2b 2: 3: a2 - C a2 (2) A + C a2 (2) A (non-unique) ** Failers No match a1b\=copy=Z,copy=A 0: a1 1: a1 2: a1 -copy substring 'Z' failed (-47): unknown or unset substring - C a1 (2) A +Number not found for group 'Z' +Copy substring 'Z' failed (-47): unknown or unset substring + C a1 (2) A (non-unique) /(?|(?)(?)(?)|(?)(?)(?))/I,dupnames Capturing subpattern count = 3 @@ -5750,7 +5756,7 @@ Subject length lower bound = 2 0: ab 1: a 2: b - C a (1) A + C a (1) A (non-unique) /^(?Pa)(?Pb)|cd/I,dupnames Capturing subpattern count = 2 @@ -5763,10 +5769,10 @@ Subject length lower bound = 2 0: ab 1: a 2: b - C a (1) A + C a (1) A (non-unique) cd\=copy=A 0: cd -copy substring 'A' failed (-47): unknown or unset substring +Copy substring 'A' failed (-47): unknown or unset substring /^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames Capturing subpattern count = 4 @@ -5783,7 +5789,7 @@ Subject length lower bound = 2 2: 3: ef 4: gh - C ef (2) A + C ef (2) A (non-unique) /^((?Pa1)|(?Pa2)b)/I,dupnames Capturing subpattern count = 3 @@ -5797,21 +5803,22 @@ Subject length lower bound = 2 0: a1 1: a1 2: a1 - G a1 (2) A + G a1 (2) A (non-unique) a2b\=get=A 0: a2b 1: a2b 2: 3: a2 - G a2 (2) A + G a2 (2) A (non-unique) ** Failers No match a1b\=get=Z,get=A 0: a1 1: a1 2: a1 -get substring 'Z' failed (-47): unknown or unset substring - G a1 (2) A +Number not found for group 'Z' +Get substring 'Z' failed (-47): unknown or unset substring + G a1 (2) A (non-unique) /^(?Pa)(?Pb)/I,dupnames Capturing subpattern count = 2 @@ -5825,7 +5832,7 @@ Subject length lower bound = 2 0: ab 1: a 2: b - G a (1) A + G a (1) A (non-unique) /^(?Pa)(?Pb)|cd/I,dupnames Capturing subpattern count = 2 @@ -5838,10 +5845,10 @@ Subject length lower bound = 2 0: ab 1: a 2: b - G a (1) A + G a (1) A (non-unique) cd\=get=A 0: cd -get substring 'A' failed (-47): unknown or unset substring +Get substring 'A' failed (-47): unknown or unset substring /^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames Capturing subpattern count = 4 @@ -5858,7 +5865,7 @@ Subject length lower bound = 2 2: 3: ef 4: gh - G ef (2) A + G ef (2) A (non-unique) /(?J)^((?Pa1)|(?Pa2)b)/I Capturing subpattern count = 3 @@ -5873,13 +5880,13 @@ Subject length lower bound = 2 0: a1 1: a1 2: a1 - C a1 (2) A + C a1 (2) A (non-unique) a2b\=copy=A 0: a2b 1: a2b 2: 3: a2 - C a2 (2) A + C a2 (2) A (non-unique) /^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I Failed: error 143 at offset 37: two named subpatterns have the same name (PCRE2_DUPNAMES not set) @@ -5910,9 +5917,9 @@ Subject length lower bound = 6 2: b 3: c 4: d - C a (1) A - C b (1) B - C d (1) C + C a (1) A (group 1) + C b (1) B (non-unique) + C d (1) C (group 4) /^(?Pa)?(?(A)a|b)/I Capturing subpattern count = 1 @@ -7037,8 +7044,8 @@ Subject length lower bound = 2 0: xy 1: x 2: y - C x (1) abc - C y (1) xyz + C x (1) abc (group 1) + C y (1) xyz (group 2) /(?x)(?'xyz'y)/I Capturing subpattern count = 2 @@ -7052,8 +7059,8 @@ Subject length lower bound = 2 0: xy 1: x 2: y - C x (1) abc - C y (1) xyz + C x (1) abc (group 1) + C y (1) xyz (group 2) /(?A)|(?B))/ Failed: error 165 at offset 15: different names for subpatterns of the same number are not allowed @@ -10078,7 +10091,7 @@ No match believe this to be a Perl bug. --/ /(?>a\Kb)z|(ab)/ - ab + ab\=startchar 0: ab 1: ab @@ -10883,8 +10896,10 @@ Subject length lower bound = 6 2: 3: baz bazfooX\=ovector=0 -Matched, but too many substrings 0: fooX + 1: foo + 2: + 3: bazfooX\=ovector=1 Matched, but too many substrings 0: fooX @@ -11888,7 +11903,7 @@ Partial match, mark=xx: 123a Partial match, mark=xx: 123a <<< -/123\Kabc/ +/123\Kabc/startchar xxxx123a\=ph Partial match: 123a xxxx123a\=ps @@ -13371,7 +13386,7 @@ No match Failed: error 130 at offset 4: unknown POSIX class name /(?=ab\K)/aftertext - abcd + abcd\=startchar Start of matched string is beyond its end - displaying from end to start. 0: ab 0+ abcd @@ -13589,4 +13604,7 @@ Subject length lower bound = 0 1: aa 2: bb +/(((((a)))))/parens_nest_limit=2 +Failed: error 119 at offset 3: parentheses are too deeply nested + # End of testinput2 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 438c4d3..70d34f4 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -3988,7 +3988,12 @@ Subject length lower bound = 1 0: sc /\X?abc/utf,no_start_optimize -\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 0: A\x{300}abc +/\x{100}\x{200}\K\x{300}/utf,startchar + \x{100}\x{200}\x{300} + 0: \x{100}\x{200}\x{300} + ^^^^^^^^^^^^^^ + # End of testinput5 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index e5199ee..52db4f4 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7424,7 +7424,7 @@ No match /((?2))((?1))/ abc -Failed: error -49: nested recursion at the same subject position +Failed: error -50: nested recursion at the same subject position /(?(R)a+|(?R)b)/ aaaabcde @@ -7444,7 +7444,7 @@ Failed: error -39: backreference condition or recursion test not supported for D /(?(R)a*(?1)|((?R))b)/ aaaabcde -Failed: error -49: nested recursion at the same subject position +Failed: error -50: nested recursion at the same subject position /(a+)/no_auto_possess aaaa\=ovector=3