Tests 1 and 2 are converted (but without save/restore).

2014-07-24 16:32:38 +00:00 · 2014-07-24 16:32:38 +00:00 · 017b6a1624
parent 1701838220
commit 017b6a1624
12 changed files with 35118 additions and 217 deletions
--- a/995
+++ b/995
@ -0,0 +1,995 @@
+#! /bin/sh
+
+###############################################################################
+# Run the PCRE2 tests using the pcre2test program. The appropriate tests are
+# selected, depending on which build-time options were used.
+#
+# When JIT support is available, all appropriate tests are run with and without
+# JIT, unless "nojit" is given on the command line. There are also two tests
+# for JIT-specific features, one to be run when JIT support is available
+# (unless "nojit" is specified), and one when it is not.
+#
+# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
+# possible to select which to test by giving "-8", "-16" or "-32" on the
+# command line.
+#
+# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
+# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
+# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
+# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
+# except test 10. Whatever order the arguments are in, the tests are always run
+# in numerical order.
+#
+# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
+# locale test, and failure usually means there's an issue with the locale
+# rather than a bug in PCRE2, so normally subsequent tests are run. "3S" is
+# useful when you want to debug or update the test.
+#
+# Inappropriate tests are automatically skipped (with a comment to say so): for
+# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
+# support is compiled, test 13 is skipped.
+#
+# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
+# followed by an argument to run cross-compiled executables under a simulator,
+# for example:
+#
+# RunTest 3 sim "qemu-arm -s 8388608"
+#
+# There are two special cases where only one argument is allowed:
+#
+# If the first and only argument is "ebcdic", the script runs the special
+# EBCDIC test that can be useful for checking certain EBCDIC features, even
+# when run in an ASCII environment.
+#
+# If the script is obeyed as "RunTest list", a list of available tests is
+# output, but none of them are run.
+###############################################################################
+
+# Define test titles in variables so that they can be output as a list. Some
+# of them are modified (e.g. with -8 or -16) when used in the actual tests.
+
+title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
+title2="Test 2: API, errors, internals, and non-Perl stuff"
+#title3="Test 3: Locale-specific features"
+#title4A="Test 4: UTF"
+#title4B=" support (Compatible with Perl >= 5.10)"
+#title5="Test 5: API, internals, and non-Perl stuff for UTF"
+#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
+#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
+#title8="Test 8: DFA matching main functionality"
+#title9="Test 9: DFA matching with UTF"
+#title10="Test 10: DFA matching with Unicode properties"
+#title11="Test 11: Internal offsets and code size tests"
+#title12="Test 12: JIT-specific features (when JIT is available)"
+#title13="Test 13: JIT-specific features (when JIT is not available)"
+#title14="Test 14: Specials for the basic 8-bit library"
+#title15="Test 15: Specials for the 8-bit library with UTF-8 support"
+#title16="Test 16: Specials for the 8-bit library with Unicode propery support"
+#title17="Test 17: Specials for the basic 16/32-bit library"
+#title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
+#title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
+#title20="Test 20: DFA specials for the basic 16/32-bit library"
+#title21="Test 21: Reloads for the basic 16/32-bit library"
+#title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
+#title23="Test 23: Specials for the 16-bit library"
+#title24="Test 24: Specials for the 16-bit library with UTF-16 support"
+#title25="Test 25: Specials for the 32-bit library"
+#title26="Test 26: Specials for the 32-bit library with UTF-32 support"
+
+maxtest=2
+
+if [ $# -eq 1 -a "$1" = "list" ]; then
+  echo $title1
+  echo $title2 "(not UTF)"
+#  echo $title3
+#  echo $title4A $title4B
+#  echo $title5 support
+#  echo $title6
+#  echo $title7
+#  echo $title8
+#  echo $title9
+#  echo $title10
+#  echo $title11
+#  echo $title12
+#  echo $title13
+#  echo $title14
+#  echo $title15
+#  echo $title16
+#  echo $title17
+#  echo $title18
+#  echo $title19
+#  echo $title20
+#  echo $title21
+#  echo $title22
+#  echo $title23
+#  echo $title24
+#  echo $title25
+#  echo $title26
+  exit 0
+fi
+
+# Set up a suitable "diff" command for comparison. Some systems
+# have a diff that lacks a -u option. Try to deal with this.
+
+cf="diff"
+diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
+
+# Find the test data
+
+if [ -n "$srcdir" -a -d "$srcdir" ] ; then
+  testdata="$srcdir/testdata"
+elif [ -d "./testdata" ] ; then
+  testdata=./testdata
+elif [ -d "../testdata" ] ; then
+  testdata=../testdata
+else
+  echo "Cannot find the testdata directory"
+  exit 1
+fi
+
+
+# ------ Special EBCDIC Test -------
+
+if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
+  ./pcre2test -C ebcdic >/dev/null
+  ebcdic=$?
+  if [ $ebcdic -ne 1 ] ; then
+    echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
+    exit 1
+  fi
+
+  for opt in "" "-dfa"; do
+    ./pcre2test -q $opt $testdata/testinputEBC >testtry
+    if [ $? = 0 ] ; then
+      $cf $testdata/testoutputEBC testtry
+      if [ $? != 0 ] ; then exit 1; fi
+    else exit 1
+    fi
+    if [ "$opt" = "-dfa" ] ; then echo "  OK using DFA"
+    else echo "  OK"
+    fi
+  done
+
+exit 0
+fi
+
+
+# ------ Normal Tests ------
+
+# Default values
+
+arg8=
+arg16=
+arg32=
+nojit=
+sim=
+skip=
+valgrind=
+
+# This is in case the caller has set aliases (as I do - PH)
+unset cp ls mv rm
+
+# Process options and select which tests to run; for those that are explicitly
+# requested, check that the necessary optional facilities are available.
+
+do1=no
+do2=no
+#do3=no
+#do4=no
+#do5=no
+#do6=no
+#do7=no
+#do8=no
+#do9=no
+#do10=no
+#do11=no
+#do12=no
+#do13=no
+#do14=no
+#do15=no
+#do16=no
+#do17=no
+#do18=no
+#do19=no
+#do20=no
+#do21=no
+#do22=no
+#do23=no
+#do24=no
+#do25=no
+#do26=no
+
+while [ $# -gt 0 ] ; do
+  case $1 in
+    1) do1=yes;;
+    2) do2=yes;;
+#    3) do3=yes;;
+#    4) do4=yes;;
+#    5) do5=yes;;
+#    6) do6=yes;;
+#    7) do7=yes;;
+#    8) do8=yes;;
+#    9) do9=yes;;
+#   10) do10=yes;;
+#   11) do11=yes;;
+#   12) do12=yes;;
+#   13) do13=yes;;
+#   14) do14=yes;;
+#   15) do15=yes;;
+#   16) do16=yes;;
+#   17) do17=yes;;
+#   18) do18=yes;;
+#   19) do19=yes;;
+#   20) do20=yes;;
+#   21) do21=yes;;
+#   22) do22=yes;;
+#   23) do23=yes;;
+#   24) do24=yes;;
+#   25) do25=yes;;
+#   26) do26=yes;;
+   -8) arg8=yes;;
+  -16) arg16=yes;;
+  -32) arg32=yes;;
+   nojit) nojit=yes;;
+   sim) shift; sim=$1;;
+   valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
+   valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
+   ~*)
+     if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
+       skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
+     else
+       echo "Unknown option or test selector '$1'"; exit 1
+     fi
+   ;;
+   *-*)
+     if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
+       tf=`expr "$1" : '\([0-9]*\)'`
+       tt=`expr "$1" : '.*-\([0-9]*\)'`
+       if [ "$tt" = "" ] ; then tt=$maxtest; fi
+       if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
+         echo "Invalid test range '$1'"; exit 1
+       fi
+       while expr "$tf" "<=" "$tt" >/dev/null; do
+         eval do${tf}=yes
+         tf=`expr $tf + 1`
+       done
+     else
+       echo "Invalid test range '$1'"; exit 1
+     fi
+   ;;
+   *) echo "Unknown option or test selector '$1'"; exit 1;;
+  esac
+  shift
+done
+
+# Find which optional facilities are available.
+
+$sim ./pcre2test -C linksize >/dev/null
+link_size=$?
+if [ $link_size -lt 2 ] ; then
+  echo "Failed to find internal link size"
+  exit 1
+fi
+if [ $link_size -gt 4 ] ; then
+  echo "Failed to find internal link size"
+  exit 1
+fi
+
+# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
+# one need be.
+
+$sim ./pcre2test -C pcre8 >/dev/null
+support8=$?
+$sim ./pcre2test -C pcre16 >/dev/null
+support16=$?
+$sim ./pcre2test -C pcre32 >/dev/null
+support32=$?
+
+# Initialize all bitsizes skipped
+
+test8=skip
+test16=skip
+test32=skip
+
+# If no bitsize arguments, select all that are available
+
+if [ "$arg8$arg16$arg32" = "" ] ; then
+  if [ $support8 -ne 0 ] ; then
+    test8=
+  fi
+  if [ $support16 -ne 0 ] ; then
+    test16=-16
+  fi
+  if [ $support32 -ne 0 ] ; then
+    test32=-32
+  fi
+
+# Select requested bit sizes
+
+else
+  if [ "$arg8" = yes ] ; then
+    if [ $support8 -eq 0 ] ; then
+      echo "Cannot run 8-bit library tests: 8-bit library not compiled"
+      exit 1
+    fi
+    test8=
+  fi
+  if [ "$arg16" = yes ] ; then
+    if [ $support16 -eq 0 ] ; then
+      echo "Cannot run 16-bit library tests: 16-bit library not compiled"
+      exit 1
+    fi
+    test16=-16
+  fi
+  if [ "$arg32" = yes ] ; then
+    if [ $support32 -eq 0 ] ; then
+      echo "Cannot run 32-bit library tests: 32-bit library not compiled"
+      exit 1
+    fi
+    test32=-32
+  fi
+fi
+
+# UTF support always applies to all bit sizes if both are supported; we can't
+# have UTF-8 support without UTF-16 support (for example).
+
+$sim ./pcre2test -C utf >/dev/null
+utf=$?
+
+jitopt=
+$sim ./pcre2test -C jit >/dev/null
+jit=$?
+if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
+  jitopt=-jit
+fi
+
+# If no specific tests were requested, select all. Those that are not
+# relevant will be automatically skipped.
+
+if [ $do1  = no -a $do2  = no ]; then
+#     -a $do3  = no -a $do4  = no -a \
+#     $do5  = no -a $do6  = no -a $do7  = no -a $do8  = no -a \
+#     $do9  = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
+#     $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
+#     $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
+#     $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
+#     $do25 = no -a $do26 = no
+
+  do1=yes
+  do2=yes
+#  do3=yes
+#  do4=yes
+#  do5=yes
+#  do6=yes
+#  do7=yes
+#  do8=yes
+#  do9=yes
+#  do10=yes
+#  do11=yes
+#  do12=yes
+#  do13=yes
+#  do14=yes
+#  do15=yes
+#  do16=yes
+#  do17=yes
+#  do18=yes
+#  do19=yes
+#  do20=yes
+#  do21=yes
+#  do22=yes
+#  do23=yes
+#  do24=yes
+#  do25=yes
+#  do26=yes
+fi
+
+# Handle any explicit skips at this stage, so that an argument list may consist
+# only of explicit skips.
+
+for i in $skip; do eval do$i=no; done
+
+# Show which release and which test data
+
+echo ""
+echo PCRE2 C library tests using test data from $testdata
+$sim ./pcre2test /dev/null
+echo ""
+
+for bmode in "$test8" "$test16" "$test32"; do
+  case "$bmode" in
+    skip) continue;;
+    -16)  if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
+          bits=16; echo "---- Testing 16-bit library ----"; echo "";;
+    -32)  if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
+          bits=32; echo "---- Testing 32-bit library ----"; echo "";;
+    *)    bits=8; echo "---- Testing 8-bit library ----"; echo "";;
+  esac
+
+# Primary test, compatible with JIT and all versions of Perl >= 5.8
+
+if [ $do1 = yes ] ; then
+  echo $title1
+  for opt in "" $jitopt; do
+    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
+    if [ $? = 0 ] ; then
+      $cf $testdata/testoutput1 testtry
+      if [ $? != 0 ] ; then exit 1; fi
+    else exit 1
+    fi
+    if [ "$opt" = "-jit" ] ; then echo "  OK with JIT"
+    else echo "  OK"
+    fi
+  done
+fi
+
+# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
+
+if [ $do2 = yes ] ; then
+  echo $title2 "(not UTF-$bits)"
+  for opt in "" $jitopt; do
+    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
+    if [ $? = 0 ] ; then
+      $cf $testdata/testoutput2 testtry
+      if [ $? != 0 ] ; then exit 1; fi
+    else
+      echo " "
+      echo "** Test 2 requires a lot of stack. If it has crashed with a"
+      echo "** segmentation fault, it may be that you do not have enough"
+      echo "** stack available by default. Please see the 'pcre2stack' man"
+      echo "** page for a discussion of PCRE2's stack usage."
+      echo " "
+      exit 1
+    fi
+    if [ "$opt" = "-jit" ] ; then echo "  OK with JIT"
+    else echo "  OK"
+    fi
+  done
+fi
+
+## Locale-specific tests, provided that either the "fr_FR" or the "french"
+## locale is available. The former is the Unix-like standard; the latter is
+## for Windows. Another possibility is "fr". Unfortunately, different versions
+## of the French locale give different outputs for some items. This test passes
+## if the output matches any one of the alternative output files.
+#
+#if [ $do3 = yes ] ; then
+#  locale -a | grep '^fr_FR$' >/dev/null
+#  if [ $? -eq 0 ] ; then
+#    locale=fr_FR
+#    infile=$testdata/testinput3
+#    outfile=$testdata/testoutput3
+#    outfile2=$testdata/testoutput3A
+#    outfile3=$testdata/testoutput3B
+#  else
+#    infile=test3input
+#    outfile=test3output
+#    outfile2=test3outputA
+#    outfile3=test3outputB
+#    locale -a | grep '^french$' >/dev/null
+#    if [ $? -eq 0 ] ; then
+#      locale=french
+#      sed 's/fr_FR/french/' $testdata/testinput3 >test3input
+#      sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
+#      sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
+#      sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
+#    else
+#      locale -a | grep '^fr$' >/dev/null
+#      if [ $? -eq 0 ] ; then
+#        locale=fr
+#        sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
+#        sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
+#        sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
+#        sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
+#      else
+#        locale=
+#      fi
+#    fi
+#  fi
+#
+#  if [ "$locale" != "" ] ; then
+#    echo $title3 "(using '$locale' locale)"
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
+#      if [ $? = 0 ] ; then
+#        if $cf $outfile testtry >teststdout || \
+#           $cf $outfile2 testtry >teststdout || \
+#           $cf $outfile3 testtry >teststdout
+#        then
+#          if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#          elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#          else echo "  OK"
+#          fi
+#        else
+#          echo "** Locale test did not run successfully. The output did not match"
+#          echo "   $outfile, $outfile2 or $outfile3."
+#          echo "   This may mean that there is a problem with the locale settings rather"
+#          echo "   than a bug in PCRE."
+#          exit 1
+#        fi
+#      else exit 1
+#      fi
+#    done
+#  else
+#    echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or"
+#    echo "'french' locales exist, or the \"locale\" command is not available"
+#    echo "to check for them."
+#    echo " "
+#  fi
+#fi
+#
+## Additional tests for UTF support
+#
+#if [ $do4 = yes ] ; then
+#  echo ${title4A}-${bits}${title4B}
+#  if [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput4 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+#if [ $do5 = yes ] ; then
+#  echo ${title5}-${bits} support
+#  if [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput5 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+#if [ $do6 = yes ] ; then
+#  echo $title6
+#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput6 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Test non-Perl-compatible Unicode property support
+#
+#if [ $do7 = yes ] ; then
+#  echo $title7
+#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput7 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for DFA matching support
+#
+#if [ $do8 = yes ] ; then
+#  echo $title8
+#  for opt in "" "-s"; do
+#    $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput8 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
+#  done
+#fi
+#
+#if [ $do9 = yes ] ; then
+#  echo ${title9}-${bits}
+#  if [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    for opt in "" "-s"; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput9 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
+#    done
+#  fi
+#fi
+#
+#if [ $do10 = yes ] ; then
+#  echo $title10
+#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s"; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput10 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
+#    done
+#  fi
+#fi
+#
+## Test of internal offsets and code sizes. This test is run only when there
+## is Unicode property support and the link size is 2. The actual tests are
+## mostly the same as in some of the above, but in this test we inspect some
+## offsets and sizes that require a known link size. This is a doublecheck for
+## the maintainer, just in case something changes unexpectely. The output from
+## this test is not the same in 8-bit and 16-bit modes.
+#
+#if [ $do11 = yes ] ; then
+#  echo $title11
+#  if [ $link_size -ne 2 ] ; then
+#    echo "  Skipped because link size is not 2"
+#  elif [ $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s"; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput11-$bits testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
+#    done
+#  fi
+#fi
+#
+## Test JIT-specific features when JIT is available
+#
+#if [ $do12 = yes ] ; then
+#  echo $title12
+#  if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
+#    echo "  Skipped because JIT is not available or not usable"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput12 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput12 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+## Test JIT-specific features when JIT is not available
+#
+#if [ $do13 = yes ] ; then
+#  echo $title13
+#  if [ $jit -ne 0 ] ; then
+#    echo "  Skipped because JIT is available"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput13 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+## Tests for 8-bit-specific features
+#
+#if [ "$do14" = yes ] ; then
+#  echo $title14
+#  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+#    echo "  Skipped when running 16/32-bit tests"
+#  else
+#    cp -f $testdata/saved16 testsaved16
+#    cp -f $testdata/saved32 testsaved32
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput14 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 8-bit-specific features (needs UTF-8 support)
+#
+#if [ "$do15" = yes ] ; then
+#  echo $title15
+#  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+#    echo "  Skipped when running 16/32-bit tests"
+#  elif [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput15 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 8-bit-specific features (Unicode property support)
+#
+#if [ $do16 = yes ] ; then
+#  echo $title16
+#  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+#    echo "  Skipped when running 16/32-bit tests"
+#  elif [ $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput16 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 16/32-bit-specific features
+#
+#if [ $do17 = yes ] ; then
+#  echo $title17
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput17 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 16/32-bit-specific features (UTF-16/32 support)
+#
+#if [ $do18 = yes ] ; then
+#  echo $title18
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  elif [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput18-$bits testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 16/32-bit-specific features (Unicode property support)
+#
+#if [ $do19 = yes ] ; then
+#  echo $title19
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  elif [ $ucp -eq 0 ] ; then
+#    echo "  Skipped because Unicode property support is not available"
+#  else
+#    for opt in "" "-s" $jitopt; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput19 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
+#
+#if [ $do20 = yes ] ; then
+#  echo $title20
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  else
+#    for opt in "" "-s"; do
+#      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
+#      if [ $? = 0 ] ; then
+#        $cf $testdata/testoutput20 testtry
+#        if [ $? != 0 ] ; then exit 1; fi
+#      else exit 1
+#      fi
+#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
+#      else echo "  OK"
+#      fi
+#    done
+#  fi
+#fi
+#
+## Tests for reloads with 16/32-bit library
+#
+#if [ $do21 = yes ] ; then
+#  echo $title21
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  elif [ $link_size -ne 2 ] ; then
+#    echo "  Skipped because link size is not 2"
+#  else
+#    cp -f $testdata/saved8 testsaved8
+#    cp -f $testdata/saved16LE-1 testsaved16LE-1
+#    cp -f $testdata/saved16BE-1 testsaved16BE-1
+#    cp -f $testdata/saved32LE-1 testsaved32LE-1
+#    cp -f $testdata/saved32BE-1 testsaved32BE-1
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput21 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput21-$bits testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+## Tests for reloads with 16/32-bit library (UTF-16 support)
+#
+#if [ $do22 = yes ] ; then
+#  echo $title22
+#  if [ "$bits" = "8" ] ; then
+#    echo "  Skipped when running 8-bit tests"
+#  elif [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  elif [ $link_size -ne 2 ] ; then
+#    echo "  Skipped because link size is not 2"
+#  else
+#    cp -f $testdata/saved16LE-2 testsaved16LE-2
+#    cp -f $testdata/saved16BE-2 testsaved16BE-2
+#    cp -f $testdata/saved32LE-2 testsaved32LE-2
+#    cp -f $testdata/saved32BE-2 testsaved32BE-2
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput22 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput22-$bits testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+#if [ $do23 = yes ] ; then
+#  echo $title23
+#  if [ "$bits" = "8" -o "$bits" = "32" ] ; then
+#    echo "  Skipped when running 8/32-bit tests"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput23 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+#if [ $do24 = yes ] ; then
+#  echo $title24
+#  if [ "$bits" = "8" -o "$bits" = "32" ] ; then
+#    echo "  Skipped when running 8/32-bit tests"
+#  elif [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput24 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+#if [ $do25 = yes ] ; then
+#  echo $title25
+#  if [ "$bits" = "8" -o "$bits" = "16" ] ; then
+#    echo "  Skipped when running 8/16-bit tests"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput25 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+#
+#if [ $do26 = yes ] ; then
+#  echo $title26
+#  if [ "$bits" = "8" -o "$bits" = "16" ] ; then
+#    echo "  Skipped when running 8/16-bit tests"
+#  elif [ $utf -eq 0 ] ; then
+#    echo "  Skipped because UTF-$bits support is not available"
+#  else
+#    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
+#    if [ $? = 0 ] ; then
+#      $cf $testdata/testoutput26 testtry
+#      if [ $? != 0 ] ; then exit 1; fi
+#    else exit 1
+#    fi
+#    echo "  OK"
+#  fi
+#fi
+
+# End of loop for 8/16/32-bit tests
+done
+
+# Clean up local working files
+rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
+
+# End
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "05 July 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "22 July 2014" "PCRE 10.00"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -141,6 +141,10 @@ Output a brief summary these options and then exit.
 Behave as if each pattern has the \fB/info\fP modifier; information about the
 compiled pattern is given after compilation.
 .TP 10
+\fB-jit\fP
+Behave as if each pattern line has the \fBjit\fP modifier; after successful
+compilation, each pattern is passed to the just-in-time compiler, if available.
+.TP 10
 \fB-pattern\fB \fImodifier-list\fP
 Behave as if each pattern line contains the given modifiers.
 .TP 10
@ -216,6 +220,17 @@ In between sets of test data, a line that begins with a hash (#) character is
 interpreted as a command line. If the first character is followed by white
 space or an exclamation mark, the line is treated as a comment, and ignored.
 Otherwise, the following commands are recognized:
+.sp
+  #forbid_utf
+.sp
+Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
+options set, which locks out the use of UTF and Unicode property features. This
+is a trigger guard that is used in test files to ensure that UTF/Unicode tests
+are not accidentally added to files that are used when UTF support is not
+included in the library. This effect can also be obtained by the use of
+\fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be unset, and
+the automatic options are not displayed in pattern information, to avoid
+cluttering up test output.
 .sp
  #load <file name>
 .sp
@ -358,10 +373,11 @@ the start of a modifier list. For example:
 .sp
  abc\=notbol,notempty
 .sp
-A backslash followed by anything else causes an error. However, if the very
-last character in the line is a backslash (and there is no modifier list), it
-is ignored. This gives a way of passing an empty line as data, since a real
-empty line terminates the data input.
+A backslash followed by any other non-alphanumeric character just escapes that
+character. A backslash followed by anything else causes an error. However, if
+the very last character in the line is a backslash (and there is no modifier
+list), it is ignored. This gives a way of passing an empty line as data, since
+a real empty line terminates the data input.
 .
 .
 .SH "PATTERN MODIFIERS"
@ -594,14 +610,17 @@ below. All other modifiers cause an error.
 .rs
 .sp
 The \fB/stackguard\fP modifier is used to test the use of
-\fBpcre2_stack_guard\fP. It must be followed by '0' or '1', specifying the
-return code to be given from an external function that is passed to PCRE2 and
-used for stack checking during compilation (see the
+\fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to
+enable stack availability to be checked during compilation (see the
 .\" HREF
 \fBpcre2api\fP
 .\"
-documentation for details). FIXME: this needs doing properly once the test is 
-implemented. Mention nested parens limit.
+documentation for details). If the number specified by the modifier is greater
+than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up
+callback from \fBpcre2_compile()\fP to a local function. The argument it is
+passed is the current nesting parenthesis depth; if this is greater than the
+value given by the modifier, non-zero is returned, causing the compilation to
+be aborted.
 .
 .
 .SS "Using alternative character tables"
@ -1210,6 +1229,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 05 July 2014
+Last updated: 22 July 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/perltest.pl
+++ b/perltest.pl
@ -0,0 +1,247 @@
+#! /usr/bin/env perl
+
+# Program for testing regular expressions with perl to check that PCRE2 handles
+# them the same. This version needs to have "use utf8" at the start for running
+# the UTF-8 tests, but *not* for the other tests. The only way I've found for
+# doing this is to cat this line in explicitly in the RunPerlTest script. I've
+# also used this method to supply "require Encode" for the UTF-8 tests, so that
+# the main test will still run where Encode is not installed.
+
+#use utf8;
+#require Encode;
+
+# Function for turning a string into a string of printing chars.
+
+sub pchars {
+my($t) = "";
+
+if ($utf8)
+  {
+  @p = unpack('U*', $_[0]);
+  foreach $c (@p)
+    {
+    if ($c >= 32 && $c < 127) { $t .= chr $c; }
+      else { $t .= sprintf("\\x{%02x}", $c);
+      }
+    }
+  }
+else
+  {
+  foreach $c (split(//, $_[0]))
+    {
+    if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
+      else { $t .= sprintf("\\x%02x", ord $c); }
+    }
+  }
+
+$t;
+}
+
+
+# Read lines from named file or stdin and write to named file or stdout; lines
+# consist of a regular expression, in delimiters and optionally followed by
+# options, followed by a set of test data, terminated by an empty line.
+
+# Sort out the input and output files
+
+if (@ARGV > 0)
+  {
+  open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
+  $infile = "INFILE";
+  }
+else { $infile = "STDIN"; }
+
+if (@ARGV > 1)
+  {
+  open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
+  $outfile = "OUTFILE";
+  }
+else { $outfile = "STDOUT"; }
+
+printf($outfile "Perl $] Regular Expressions\n\n");
+
+# Main loop
+
+NEXT_RE:
+for (;;)
+  {
+  printf "  re> " if $infile eq "STDIN";
+  last if ! ($_ = <$infile>);
+  printf $outfile "$_" if $infile ne "STDIN";
+  next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
+
+  $pattern = $_;
+
+  while ($pattern !~ /^\s*(.).*\1/s)
+    {
+    printf "    > " if $infile eq "STDIN";
+    last if ! ($_ = <$infile>);
+    printf $outfile "$_" if $infile ne "STDIN";
+    $pattern .= $_;
+    }
+
+  chomp($pattern);
+  $pattern =~ s/\s+$//;
+  
+  # Split the pattern from the modifiers and adjust them as necessary.
+  
+  $pattern =~ /^\s*((.).*\2)(.*)$/s;
+  $pat = $1;
+  $mod = $3;
+  
+  # The private "aftertext" modifier means "print $' afterwards".
+
+  $showrest = ($mod =~ s/aftertext,?//);
+
+  # "allaftertext" is used by pcretest to print remainders after captures
+
+  $mod =~ s/allaftertext,?//;
+
+  # Detect utf
+
+  $utf8 = $mod =~ s/utf,?//;
+
+  # Remove "dupnames".
+
+  $mod =~ s/dupnames,?//;
+
+  # Remove "mark" (asks pcre2test to check MARK data) */
+
+  $mod =~ s/mark,?//;
+
+  # "ucp" asks pcre2test to set PCRE_UCP; change this to /u for Perl
+
+  $mod =~ s/W(?=[a-zA-Z]*$)/u/;
+
+  # Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
+
+  $mod =~ s/no_auto_possess,?//;
+  $mod =~ s/no_start_optimize,?//;
+
+  # Add back retained modifiers and check that the pattern is valid.
+
+  $mod =~ s/,//g;
+  $pattern = "$pat$mod";
+  eval "\$_ =~ ${pattern}";
+  if ($@)
+    {
+    printf $outfile "Error: $@";
+    if ($infile != "STDIN")
+      {
+      for (;;)
+        {
+        last if ! ($_ = <$infile>);
+        last if $_ =~ /^\s*$/; 
+        }   
+      }  
+    next NEXT_RE;
+    }
+
+  # If the /g modifier is present, we want to put a loop round the matching;
+  # otherwise just a single "if".
+
+  $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
+
+  # If the pattern is actually the null string, Perl uses the most recently
+  # executed (and successfully compiled) regex is used instead. This is a
+  # nasty trap for the unwary! The PCRE2 test suite does contain null strings
+  # in places - if they are allowed through here all sorts of weird and
+  # unexpected effects happen. To avoid this, we replace such patterns with
+  # a non-null pattern that has the same effect.
+
+  $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
+
+  # Read data lines and test them
+
+  for (;;)
+    {
+    printf "data> " if $infile eq "STDIN";
+    last NEXT_RE if ! ($_ = <$infile>);
+    chomp;
+    printf $outfile "$_\n" if $infile ne "STDIN";
+
+    s/\s+$//;  # Remove trailing space
+    s/^\s+//;  # Remove leading space
+    s/\\Y//g;  # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
+
+    last if ($_ eq "");
+    $x = eval "\"$_\"";   # To get escapes processed
+
+    # Empty array for holding results, ensure $REGERROR and $REGMARK are
+    # unset, then do the matching.
+
+    @subs = ();
+
+    $pushes = "push \@subs,\$&;" .
+         "push \@subs,\$1;" .
+         "push \@subs,\$2;" .
+         "push \@subs,\$3;" .
+         "push \@subs,\$4;" .
+         "push \@subs,\$5;" .
+         "push \@subs,\$6;" .
+         "push \@subs,\$7;" .
+         "push \@subs,\$8;" .
+         "push \@subs,\$9;" .
+         "push \@subs,\$10;" .
+         "push \@subs,\$11;" .
+         "push \@subs,\$12;" .
+         "push \@subs,\$13;" .
+         "push \@subs,\$14;" .
+         "push \@subs,\$15;" .
+         "push \@subs,\$16;" .
+         "push \@subs,\$'; }";
+
+    undef $REGERROR;
+    undef $REGMARK;
+
+    eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
+
+    if ($@)
+      {
+      printf $outfile "Error: $@\n";
+      next NEXT_RE;
+      }
+    elsif (scalar(@subs) == 0)
+      {
+      printf $outfile "No match";
+      if (defined $REGERROR && $REGERROR != 1)
+        { printf $outfile (", mark = %s", &pchars($REGERROR)); }
+      printf $outfile "\n";
+      }
+    else
+      {
+      while (scalar(@subs) != 0)
+        {
+        printf $outfile (" 0: %s\n", &pchars($subs[0]));
+        printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
+        $last_printed = 0;
+        for ($i = 1; $i <= 16; $i++)
+          {
+          if (defined $subs[$i])
+            {
+            while ($last_printed++ < $i-1)
+              { printf $outfile ("%2d: <unset>\n", $last_printed); }
+            printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
+            $last_printed = $i;
+            }
+          }
+        splice(@subs, 0, 18);
+        }
+
+      # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
+      # set and the input pattern was a UTF-8 string. We can, however, force
+      # it to be so marked.
+
+      if (defined $REGMARK && $REGMARK != 1)
+        {
+        $xx = $REGMARK;
+        $xx = Encode::decode_utf8($xx) if $utf8;
+        printf $outfile ("MK: %s\n", &pchars($xx));
+        }
+      }
+    }
+  }
+
+# printf $outfile "\n";
+
+# End
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -561,7 +561,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, 
       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, 
       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, 
-       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77 }; 
+       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 }; 

 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -1703,10 +1703,10 @@ else
      ptr += 4;
      if (utf)
        {
-        if (c > 0x10ffffU) *errorcodeptr = ERR76;
+        if (c > 0x10ffffU) *errorcodeptr = ERR77;
          else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;  
        }
-      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR76; 
+      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; 
      }
    break;

@ -1815,12 +1815,11 @@ else
    recommended to avoid the ambiguities in the old syntax.

    Outside a character class, the digits are read as a decimal number. If the
-    number is less than 8 (used to be 10), or if there are that many previous
-    extracting left brackets, then it is a back reference. Otherwise, up to
-    three octal digits are read to form an escaped byte. Thus \123 is likely to
-    be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
-    the octal value is greater than 377, the least significant 8 bits are
-    taken. \8 and \9 are treated as the literal characters 8 and 9.
+    number is less than 10, or if there are that many previous extracting left
+    brackets, it is a back reference. Otherwise, up to three octal digits are
+    read to form an escaped byte. Thus \123 is likely to be octal 123 (cf
+    \0123, which is octal 012 followed by the literal 3). If the octal value is
+    greater than 377, the least significant 8 bits are taken.

    Inside a character class, \ followed by a digit is always either a literal
    8 or 9 or an octal number. */
@ -1832,7 +1831,7 @@ else
      {
      oldptr = ptr;
      /* The integer range is limited by the machine's int representation. */
-      s = (int)(c -CHAR_0);
+      s = (int)(c - CHAR_0);
      overflow = FALSE;
      while (IS_DIGIT(ptr[1]))
        {
@ -1849,7 +1848,7 @@ else
        *errorcodeptr = ERR61;
        break;
        }
-      if (s < 8 || s <= cb->bracount)  /* Check for back reference */
+      if (s < 10 || s <= cb->bracount)  /* Check for back reference */
        {
        escape = -s;
        break;
@ -1886,7 +1885,7 @@ else

    case CHAR_o:
    if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
-    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR77; else 
+    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else 
      {
      ptr += 2;
      c = 0;
@ -1947,7 +1946,7 @@ else
        ptr += 2;
        if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
          {
-          *errorcodeptr = ERR77;
+          *errorcodeptr = ERR78;
          break;
          }    
        c = 0;
@ -1955,12 +1954,12 @@ else
        
        while ((cc = XDIGIT(*ptr)) != 0xff)
          {
+          ptr++;
          if (c == 0 && cc == 0) continue;   /* Leading zeroes */
 #if PCRE2_CODE_UNIT_WIDTH == 32
          if (c >= 0x10000000l) { overflow = TRUE; break; }
 #endif
          c = (c << 4) | cc;
-          ptr++;
          if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
            {
            overflow = TRUE;
@ -2002,9 +2001,9 @@ else
    break;

    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
-    An error is given if the byte following \c is not an ASCII character. This
-    coding is ASCII-specific, but then the whole concept of \cx is
-    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
+    An error is given if the byte following \c is not a printable ASCII
+    character. This coding is ASCII-specific, but then the whole concept of \cx
+    is ASCII-specific. (However, an EBCDIC equivalent has now been added.) */

    case CHAR_c:
    c = *(++ptr);
@ -2014,7 +2013,7 @@ else
      break;
      }
 #ifndef EBCDIC    /* ASCII/UTF-8 coding */
-    if (c > 127)  /* Excludes all non-ASCII in either mode */
+    if (c < 32 || c > 126)  /* Excludes all non-printable ASCII */
      {
      *errorcodeptr = ERR68;
      break;
@ -3820,7 +3819,7 @@ for (;; ptr++)
          {
          ptr += 2;
          if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
-            { ptr += 2; goto CONTINUE_CLASS; }
+            { ptr += 2; continue; }
          inescq = TRUE;
          break;
          }
@ -4981,7 +4980,7 @@ for (;; ptr++)
        arglen = (int)(ptr - arg);
        if ((unsigned int)arglen > MAX_MARK)
          {
-          *errorcodeptr = ERR75;
+          *errorcodeptr = ERR76;
          goto FAILED;
          }
        }
@ -6548,10 +6547,9 @@ Returns:            TRUE on success
 static BOOL
 compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipunits,
-  int cond_depth,
-  uint32_t *firstcuptr, int32_t *firstcuflagsptr,
-  uint32_t *reqcuptr, int32_t *reqcuflagsptr,
-  branch_chain *bcptr, compile_block *cb, size_t *lengthptr)
+  int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
+  uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, 
+  compile_block *cb, size_t *lengthptr)
 {
 PCRE2_SPTR ptr = *ptrptr;
 PCRE2_UCHAR *code = *codeptr;
@ -6569,15 +6567,13 @@ unsigned int orig_bracount;
 unsigned int max_bracount;
 branch_chain bc;

-#ifdef FIXME
 /* If set, call the external function that checks for stack availability. */

-if (ccontext->stack_guard != NULL && ccontext->stack_guard(0))
+if (cb->cx->stack_guard != NULL && cb->cx->stack_guard(cb->parens_depth))
  {
  *errorcodeptr= ERR33;
  return FALSE;
  }
-#endif   

 /* Miscellaneous initialization */

@ -7434,7 +7430,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
          if (c > UINT32_MAX / 10 - 1) break;   /* Integer overflow */
          c = c*10 + ptr[pp++] - CHAR_0;
          }
-        if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) goto END_PSO;
+        if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) 
+          {
+          errorcode = ERR60; 
+          goto HAD_ERROR;
+          } 
        if (p->type == PSO_LIMM) limit_match = c;
          else limit_recursion = c;
        skipatstart += pp - skipatstart;
@ -7443,12 +7443,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
      break;   /* Out of the table scan loop */   
      }
    }
-  if (i > sizeof(pso_list)/sizeof(pso)) break;   /* Out of pso loop */
+  if (i >= sizeof(pso_list)/sizeof(pso)) break;   /* Out of pso loop */
  }

 /* End of pattern-start options; advance to start of real regex. */

-END_PSO:
 ptr += skipatstart;

 /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
@ -7477,6 +7476,15 @@ if (utf)
    goto HAD_ERROR;
  }   
  
+/* Check UCP lockout. */
+
+if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) == 
+    (PCRE2_UCP|PCRE2_NEVER_UCP))
+  {
+  errorcode = ERR75;
+  goto HAD_ERROR;
+  }       
+
 /* Process the BSR setting. */

 if (bsr == 0) bsr = ccontext->bsr_convention;
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -148,15 +148,16 @@ static const char compile_error_texts[] =
  "different names for subpatterns of the same number are not allowed\0"
  "(*MARK) must have an argument\0"
  "non-hex character in \\x{} (closing brace missing?)\0"
-  "\\c must be followed by an ASCII character\0"
+  "\\c must be followed by a printable ASCII character\0"
  "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
  /* 70 */
  "internal error: unknown opcode in find_fixedlength()\0"
  "\\N is not supported in a class\0"
  "too many forward references\0"
  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
-  "using (*UTF) is disabled by the application\0"
+  "using UTF is disabled by the application\0"
  /* 75 */
+  "using UCP is disabled by the application\0"
  "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
  "character code point value in \\u.... sequence is too large\0"
  "digits missing in \\x{} or \\o{}\0" 
@ -223,7 +224,7 @@ static const char match_error_texts[] =
  "JIT stack limit reached\0"
  "match limit exceeded\0"
  "no more memory\0"
-  "unknown substring\0" 
+  "unknown or unset substring\0" 
  /* 50 */ 
  "NULL argument passed\0"
  "nested recursion at the same subject position\0"
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -6782,6 +6782,12 @@ ENDLOOP:
 release_match_heapframes(&frame_zero, mb);
 #endif

+/* Fill in fields that are always returned in the match data. */
+
+match_data->code = re;
+match_data->subject = subject;
+match_data->mark = mb->mark;
+
 /* Handle a fully successful match. */

 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
@ -6842,25 +6848,26 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
    match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
    }
    
-  /* Fill in the remaining fields that are returned in the match data. */
+  /* Set the remaining returned values */

-  match_data->code = re;
-  match_data->subject = subject;
  match_data->leftchar = mb->start_used_ptr - subject;
  match_data->rightchar = 0;  /* FIXME */
  match_data->startchar = start_match - subject;
-  match_data->mark = mb->mark;
  return match_data->rc;
  }

 /* Control gets here if there has been a partial match, an error, or if the
-overall match attempt has failed at all permitted starting positions. For
-anything other than nomatch or partial match, just return the code. */
+overall match attempt has failed at all permitted starting positions. Any mark 
+data is in the nomatch_mark field. */
+
+match_data->mark = mb->nomatch_mark;
+
+/* For anything other than nomatch or partial match, just return the code. */

 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
  match_data->rc = rc;

-/* Handle a partial match. */
+/* Else handle a partial match. */

 else if (match_partial != NULL)
  {
@ -6870,16 +6877,16 @@ else if (match_partial != NULL)
    match_data->ovector[1] = end_subject - subject;
    }
  match_data->leftchar = start_partial - subject;
+  match_data->rightchar = 0;  /* FIXME */
+  match_data->startchar = match_partial - subject;
  match_data->rc = PCRE2_ERROR_PARTIAL;
  }

-/* This is the classic nomatch case. */
+/* Else this is the classic nomatch case. */

-else
-  {
-  match_data->rc = PCRE2_ERROR_NOMATCH;
-  match_data->mark = mb->nomatch_mark;
-  }
+else match_data->rc = PCRE2_ERROR_NOMATCH;
+
+/* Free any temporary offsets. */

 if (using_temporary_offsets)
  mb->memctl.free(mb->ovector, mb->memctl.memory_data);
--- a/src/pcre2_substring.c
+++ b/src/pcre2_substring.c
@ -119,6 +119,7 @@ size_t left, right;
 size_t p = 0;
 PCRE2_SPTR subject = match_data->subject;
 if (stringnumber >= match_data->oveccount ||
+    stringnumber > match_data->code->top_bracket ||
    (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 right = match_data->ovector[stringnumber*2+1];
@ -203,6 +204,7 @@ PCRE2_UCHAR *yield;

 PCRE2_SPTR subject = match_data->subject;
 if (stringnumber >= match_data->oveccount ||
+    stringnumber > match_data->code->top_bracket ||
    (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 right = match_data->ovector[stringnumber*2+1];
@ -293,6 +295,7 @@ pcre2_substring_length_bynumber(pcre2_match_data *match_data,
  int stringnumber)
 {
 if (stringnumber >= match_data->oveccount ||
+    stringnumber > match_data->code->top_bracket ||
    match_data->ovector[stringnumber*2] == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 return match_data->ovector[stringnumber*2 + 1] -
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -46,7 +46,6 @@ POSSIBILITY OF SUCH DAMAGE.
 . save code and #load
 . JIT - compile, time, verify
 . memory handling testing
-. stackguard testing
 */


@ -435,7 +434,7 @@ static modstruct modlist[] = {
  { "dfa_shortest",        MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,        DO(options) },
  { "dollar_endonly",      MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,      PO(options) },
  { "dotall",              MOD_PATP, MOD_OPT, PCRE2_DOTALL,              PO(options) },
-  { "dupnames",            MOD_PAT,  MOD_OPT, PCRE2_DUPNAMES,            PO(options) },
+  { "dupnames",            MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,            PO(options) },
  { "extended",            MOD_PATP, MOD_OPT, PCRE2_EXTENDED,            PO(options) },
  { "find_limits",         MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,            DO(control) },
  { "firstline",           MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,           PO(options) },
@ -612,6 +611,7 @@ clock_t total_compile_time = 0;
 clock_t total_match_time = 0;

 static uint32_t dfa_matched;
+static uint32_t forbid_utf = 0;
 static uint32_t max_oveccount;
 static uint32_t callout_count;

@ -831,6 +831,14 @@ are supported. */
  else \
    pcre2_set_character_tables_32(G(a,32),b)
    
+#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
+  if (test_mode == PCRE8_MODE) \
+    pcre2_set_compile_recursion_guard_8(G(a,8),b); \
+  else if (test_mode == PCRE16_MODE) \
+    pcre2_set_compile_recursion_guard_16(G(a,16),b); \
+  else \
+    pcre2_set_compile_recursion_guard_32(G(a,32),b)
+
 #define PCRE2_SET_MATCH_LIMIT(a,b) \
  if (test_mode == PCRE8_MODE) \
    pcre2_set_match_limit_8(G(a,8),b); \
@ -1102,6 +1110,12 @@ the three different cases. */
  else \
    G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)

+#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
+  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+    G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b); \
+  else \
+    G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b)
+
 #define PCRE2_SET_MATCH_LIMIT(a,b) \
  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
    G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@ -1245,8 +1259,10 @@ the three different cases. */
 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
 #define PCRE2_SET_CALLOUT(a,b,c) \
-  pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c);
+  pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c)
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
+#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
+  pcre2_set_compile_recursion_guard_8(G(a,8),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
@ -1304,12 +1320,14 @@ the three different cases. */
 #define PCRE2_SET_CALLOUT(a,b,c) \
  pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *))b,c);
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
+#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
+  pcre2_set_compile_recursion_guard_16(G(a,16),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
+  a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
-  a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e);
+  a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d) \
  a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d)
@ -1361,10 +1379,12 @@ the three different cases. */
 #define PCRE2_SET_CALLOUT(a,b,c) \
  pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *))b,c);
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
+#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
+  pcre2_set_compile_recursion_guard_32(G(a,32),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
+  a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
  a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
@ -1766,6 +1786,25 @@ free(block);
 #endif  /* NO_RECURSE */


+/*************************************************
+*       Callback function for stack guard        *
+*************************************************/
+
+/* This is set up to be called from pcre2_compile() when the stackguard=n
+modifier sets a value greater than zero. The test we do is whether the 
+parenthesis nesting depth is greater than the value set by the modifier.
+
+Argument:  the current parenthesis nesting depth
+Returns:   non-zero to kill the compilation
+*/
+
+static int
+stack_guard(uint32_t depth)
+{
+return depth > pat_patctl.stackguard_test;
+}
+
+
 /*************************************************
 *      Convert UTF-8 character to code point     *
 *************************************************/
@ -2031,16 +2070,16 @@ return i + 1;

 #ifdef SUPPORT_PCRE16
 /*************************************************
-*         Convert a string to 16-bit             *
+*          Convert pattern to 16-bit             *
 *************************************************/

-/* The input is always interpreted as a string of UTF-8 bytes. If all the input
-bytes are ASCII, the space needed for a 16-bit string is exactly double the
-8-bit size. Otherwise, the size needed for a 16-bit string is no more than
-double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
-in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
-result is always left in pbuffer16. Impose a minimum size to save repeated
-re-sizing.
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
+all the input bytes are ASCII, the space needed for a 16-bit string is exactly
+double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
+more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
+possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
+UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
+repeated re-sizing.

 Note that this function does not object to surrogate values. This is
 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
@ -2074,9 +2113,13 @@ if (pbuffer16_size < 2*len + 2)
    exit(1);
    }
  }
-pp = pbuffer16;

-while (len > 0)
+pp = pbuffer16;
+if (!utf)
+  {
+  while (len-- > 0) *pp++ = *p++;
+  }  
+else while (len > 0)
  {
  uint32_t c;
  int chlen = utf82ord(p, &c);
@ -2102,15 +2145,15 @@ return pp - pbuffer16;

 #ifdef SUPPORT_PCRE32
 /*************************************************
-*         Convert a string to 32-bit             *
+*          Convert pattern to 32-bit             *
 *************************************************/

-/* The input is always interpreted as a string of UTF-8 bytes. If all the input
-bytes are ASCII, the space needed for a 32-bit string is exactly four times the
-8-bit size. Otherwise, the size needed for a 32-bit string is no more than four
-times, because the number of characters must be less than the number of bytes.
-The result is always left in pbuffer32. Impose a minimum size to save repeated
-re-sizing.
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
+all the input bytes are ASCII, the space needed for a 32-bit string is exactly
+four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
+more than four times, because the number of characters must be less than the
+number of bytes. The result is always left in pbuffer32. Impose a minimum size
+to save repeated re-sizing.

 Note that this function does not object to surrogate values. This is
 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
@ -2143,9 +2186,13 @@ if (pbuffer32_size < 4*len + 4)
    exit(1);
    }
  }
-pp = pbuffer32;

-while (len > 0)
+pp = pbuffer32;
+if (!utf)
+  {
+  while (len-- > 0) *pp++ = *p++;
+  }  
+else while (len > 0)
  {
  uint32_t c;
  int chlen = utf82ord(p, &c);
@ -3021,8 +3068,25 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
  pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
  
+  /* Remove UTF/UCP if they were there only because of forbid_utf. This saves 
+  cluttering up the verification output of non-UTF test files. */
+  
+  if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
+    {
+    compile_options &= ~PCRE2_NEVER_UTF; 
+    overall_options &= ~PCRE2_NEVER_UTF; 
+    }  
+ 
+  if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
+    {
+    compile_options &= ~PCRE2_NEVER_UCP; 
+    overall_options &= ~PCRE2_NEVER_UCP; 
+    }  
+
  if ((compile_options|overall_options) == 0)
    fprintf(outfile, "No options\n");
+  else if (compile_options == overall_options)
+    show_compile_options(compile_options, "Options:", "\n");    
  else
    {
    show_compile_options(compile_options, "Compile options:", "\n");
@ -3035,26 +3099,26 @@ if ((pat_patctl.control & CTL_INFO) != 0)
    fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
      "any Unicode newline" : "CR, LF, or CRLF");

-  switch (newline_convention)
+  if (newline_convention != NEWLINE_DEFAULT) switch (newline_convention)
    {
    case PCRE2_NEWLINE_CR:
-    fprintf(outfile, "Newline is CR\n");
+    fprintf(outfile, "Forced newline is CR\n");
    break;

    case PCRE2_NEWLINE_LF:
-    fprintf(outfile, "Newline is LF\n");
+    fprintf(outfile, "Forced newline is LF\n");
    break;

    case PCRE2_NEWLINE_CRLF:
-    fprintf(outfile, "Newline is CRLF\n");
+    fprintf(outfile, "Forced newline is CRLF\n");
    break;

    case PCRE2_NEWLINE_ANYCRLF:
-    fprintf(outfile, "Newline is CR, LF, or CRLF\n");
+    fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
    break;

    case PCRE2_NEWLINE_ANY:
-    fprintf(outfile, "Newline is any Unicode newline\n");
+    fprintf(outfile, "Forced newline is any Unicode newline\n");
    break;

    default:
@ -3063,7 +3127,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)

  if (first_ctype == 2)
    {
-    fprintf(outfile, "First char at start or follows newline\n");
+    fprintf(outfile, "First code unit at start or follows newline\n");
    }
  else if (first_ctype == 1)
    {
@ -3079,35 +3143,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
      fprintf(outfile, "%s\n", caseless);
      }
    }
-  else
-    {
-    fprintf(outfile, "No first code unit\n");
-    }
-
-  if (last_ctype == 0)
-    {
-    fprintf(outfile, "No last code unit\n");
-    }
-  else
-    {
-    const char *caseless =
-      ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
-      "" : " (caseless)";
-    if (PRINTOK(last_cunit))
-      fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
-    else
-      {
-      fprintf(outfile, "Last code unit = ");
-      pchar(last_cunit, FALSE, outfile);
-      fprintf(outfile, "%s\n", caseless);
-      }
-    }
-
-  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
-
-  if (start_bits == NULL)
-    fprintf(outfile, "No starting code unit list\n");
-  else
+  else if (start_bits != NULL)
    {
    int i;
    int c = 24;
@ -3135,6 +3171,31 @@ if ((pat_patctl.control & CTL_INFO) != 0)
      }
    fprintf(outfile, "\n");
    }
+  else
+    {
+    fprintf(outfile, "No first code unit\n");
+    }
+
+  if (last_ctype == 0)
+    {
+    fprintf(outfile, "No last code unit\n");
+    }
+  else
+    {
+    const char *caseless =
+      ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
+      "" : " (caseless)";
+    if (PRINTOK(last_cunit))
+      fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
+    else
+      {
+      fprintf(outfile, "Last code unit = ");
+      pchar(last_cunit, FALSE, outfile);
+      fprintf(outfile, "%s\n", caseless);
+      }
+    }
+
+  fprintf(outfile, "Subject length lower bound = %d\n", minlength);

 /* FIXME: tidy this up */

@ -3183,7 +3244,11 @@ if (restrict_for_perl_test)
  return PR_ABEND;
  }

-if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
+if (strncmp((char *)buffer, "#forbid_utf", 11) == 0 && isspace(buffer[11]))
+  {
+  forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 
+  } 
+else if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
  {
  (void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
  }
@ -3491,6 +3556,13 @@ else switch (pat_patctl.tables_id)

 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);

+/* Set up for the stackguard test. */
+
+if (pat_patctl.stackguard_test != 0) 
+  {
+  PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
+  } 
+
 /* Handle compiling via the POSIX interface, which doesn't support the
 timing, showing, or debugging options, nor the ability to pass over
 local character tables. Neither does it have 16-bit or 32-bit support. */
@ -3604,7 +3676,7 @@ if (timeit > 0)
  for (i = 0; i < timeit; i++)
    {
    PCRE2_COMPILE(compiled_code, pbuffer, patlen,
-      pat_patctl.options, &errorcode, &erroroffset, pat_context);
+      pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context);
    if (TEST(compiled_code, !=, NULL))
      { SUB1(pcre2_code_free, compiled_code); }
    }
@ -3618,8 +3690,8 @@ if (timeit > 0)

 /* A final compile that is used "for real". */

-PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode,
-  &erroroffset, pat_context);
+PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf, 
+  &errorcode, &erroroffset, pat_context);

 /* Compilation failed; go back for another re, skipping to blank line
 if non-interactive. */
@ -3782,14 +3854,12 @@ for (;;)
    min = mid;
    mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
    }
-
  else if (capcount >= 0 ||
           capcount == PCRE2_ERROR_NOMATCH ||
           capcount == PCRE2_ERROR_PARTIAL)
    {
    if (mid == min + 1)
      {
-      if (capcount != PCRE2_ERROR_NOMATCH)
      fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
      break;
      }
@ -4184,9 +4254,12 @@ while ((c = *p++) != 0)
    continue;

    default:
+    if (isalnum(c))
+      { 
      fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
      return PR_OK;
      } 
+    }

  /* We now have a character value in c that may be greater than 255.
  In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
@ -4608,7 +4681,12 @@ for (gmatched = 0;; gmatched++)
      PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer,
        sizeof(copybuffer)/code_unit_size);
      if (rc < 0)
-        fprintf(outfile, "copy substring %d failed %d\n", n, rc);
+        { 
+        fprintf(outfile, "copy substring %d failed (%d): ", n, rc);
+        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
+        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
+        fprintf(outfile, "\n");
+        } 
      else
        {
        fprintf(outfile, "%2dC ", n);
@ -4641,7 +4719,10 @@ for (gmatched = 0;; gmatched++)
        copybuffer, sizeof(copybuffer)/code_unit_size);
      if (rc < 0)
        {
-        fprintf(outfile, "copy substring '%s' failed %d\n", nptr, rc);
+        fprintf(outfile, "copy substring '%s' failed (%d): ", nptr, rc);
+        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
+        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
+        fprintf(outfile, "\n");
        }
      else
        {
@ -4661,7 +4742,12 @@ for (gmatched = 0;; gmatched++)
      uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
      PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer);
      if (rc < 0)
-        fprintf(outfile, "get substring %d failed %d\n", n, rc);
+        { 
+        fprintf(outfile, "get substring %d failed (%d): ", n, rc);
+        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
+        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
+        fprintf(outfile, "\n");
+        } 
      else
        {
        fprintf(outfile, "%2dG ", n);
@ -4694,7 +4780,10 @@ for (gmatched = 0;; gmatched++)
      PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer);
      if (rc < 0)
        {
-        fprintf(outfile, "get substring '%s' failed %d\n", nptr, rc);
+        fprintf(outfile, "get substring '%s' failed (%d): ", nptr, rc);
+        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
+        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
+        fprintf(outfile, "\n");
        }
      else
        {
@ -4715,7 +4804,12 @@ for (gmatched = 0;; gmatched++)
      size_t *lengths;
      PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
      if (rc < 0)
-        fprintf(outfile, "get substring list failed %d\n", rc);
+        { 
+        fprintf(outfile, "get substring list failed (%d): ", rc);
+        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
+        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
+        fprintf(outfile, "\n");
+        } 
      else
        {
        for (i = 0; i < capcount; i++)
@ -4737,7 +4831,6 @@ for (gmatched = 0;; gmatched++)
  else if (capcount == PCRE2_ERROR_PARTIAL)
    {
    PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
-
    fprintf(outfile, "Partial match");
    if (leftchar != FLD(match_data, startchar))
      fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
@ -4880,8 +4973,8 @@ for (gmatched = 0;; gmatched++)
    else
      {
      pp += end_offset * code_unit_size;
-      len -= end_offset;
-      ulen -= end_offset *code_unit_size;
+      len -= end_offset * code_unit_size;
+      ulen -= end_offset;
      }
    }
  }  /* End of global loop */
@ -4894,7 +4987,7 @@ return PR_OK;


 /*************************************************
-*                Print PCRE version              *
+*               Print PCRE2 version              *
 *************************************************/

 /* The version string was read into 'version' at the start of execution. */
@ -4903,7 +4996,7 @@ static void
 print_version(FILE *f)
 {
 VERSION_TYPE *vp;
-fprintf(f, "PCRE version ");
+fprintf(f, "PCRE2 version ");
 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
 fprintf(f, "\n");
 }
@ -4976,6 +5069,7 @@ printf("  -d            set default pattern control 'debug'\n");
 printf("  -dfa          set default subject control 'dfa'\n");
 printf("  -help         show usage information\n");
 printf("  -i            set default pattern control 'info'\n");
+printf("  -jit          set default pattern control 'jit'\n");
 printf("  -q            quiet: do not output PCRE version number at start\n");
 printf("  -pattern <s>  set default pattern control fields\n");
 printf("  -subject <s>  set default subject control fields\n");
@ -5261,10 +5355,18 @@ while (argc > 1 && argv[op][0] == '-')

  /* Set some common pattern and subject controls */

+  else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 
  else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
  else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
  else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
-  else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 
+  else if (strcmp(arg, "-jit") == 0)
+    {
+    def_patctl.jit = 7;  /* full & partial */ 
+#ifndef SUPPORT_JIT
+    fprintf(stderr, "** Warning: JIT support is not available: "
+                    "-jit calls dummy functions.\n");
+#endif     
+    } 

  /* Set timing parameters */

@ -5503,7 +5605,8 @@ while (notdone)
    while (isspace(*p)) p++; 
    if (*p != 0)
      {
-      fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer);
+      fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, 
+        *buffer);
      rc = PR_SKIP;
      }
    }
--- a/testdata/testinput1
+++ b/testdata/testinput1
--- a/testdata/testinput2
+++ b/testdata/testinput2
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
--- a/testdata/testoutput2
+++ b/testdata/testoutput2