Tests 1 and 2 are converted (but without save/restore).

2014-07-24 16:32:38 +00:00 · 2014-07-24 16:32:38 +00:00 · 017b6a1624
parent 1701838220
commit 017b6a1624
12 changed files with 35118 additions and 217 deletions
--- a/995
+++ b/995
@ -0,0 +1,995 @@
 #! /bin/sh
 ###############################################################################
 # Run the PCRE2 tests using the pcre2test program. The appropriate tests are
 # selected, depending on which build-time options were used.
 #
 # When JIT support is available, all appropriate tests are run with and without
 # JIT, unless "nojit" is given on the command line. There are also two tests
 # for JIT-specific features, one to be run when JIT support is available
 # (unless "nojit" is specified), and one when it is not.
 #
 # Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
 # possible to select which to test by giving "-8", "-16" or "-32" on the
 # command line.
 #
 # As well as "nojit", "-8", "-16", and "-32", arguments for this script are
 # individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
 # end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
 # runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
 # except test 10. Whatever order the arguments are in, the tests are always run
 # in numerical order.
 #
 # The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
 # locale test, and failure usually means there's an issue with the locale
 # rather than a bug in PCRE2, so normally subsequent tests are run. "3S" is
 # useful when you want to debug or update the test.
 #
 # Inappropriate tests are automatically skipped (with a comment to say so): for
 # example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
 # support is compiled, test 13 is skipped.
 #
 # Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
 # followed by an argument to run cross-compiled executables under a simulator,
 # for example:
 #
 # RunTest 3 sim "qemu-arm -s 8388608"
 #
 # There are two special cases where only one argument is allowed:
 #
 # If the first and only argument is "ebcdic", the script runs the special
 # EBCDIC test that can be useful for checking certain EBCDIC features, even
 # when run in an ASCII environment.
 #
 # If the script is obeyed as "RunTest list", a list of available tests is
 # output, but none of them are run.
 ###############################################################################
 # Define test titles in variables so that they can be output as a list. Some
 # of them are modified (e.g. with -8 or -16) when used in the actual tests.
 title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
 title2="Test 2: API, errors, internals, and non-Perl stuff"
 #title3="Test 3: Locale-specific features"
 #title4A="Test 4: UTF"
 #title4B=" support (Compatible with Perl >= 5.10)"
 #title5="Test 5: API, internals, and non-Perl stuff for UTF"
 #title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
 #title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
 #title8="Test 8: DFA matching main functionality"
 #title9="Test 9: DFA matching with UTF"
 #title10="Test 10: DFA matching with Unicode properties"
 #title11="Test 11: Internal offsets and code size tests"
 #title12="Test 12: JIT-specific features (when JIT is available)"
 #title13="Test 13: JIT-specific features (when JIT is not available)"
 #title14="Test 14: Specials for the basic 8-bit library"
 #title15="Test 15: Specials for the 8-bit library with UTF-8 support"
 #title16="Test 16: Specials for the 8-bit library with Unicode propery support"
 #title17="Test 17: Specials for the basic 16/32-bit library"
 #title18="Test 18: Specials for the 16/32-bit library with UTF-16/32 support"
 #title19="Test 19: Specials for the 16/32-bit library with Unicode property support"
 #title20="Test 20: DFA specials for the basic 16/32-bit library"
 #title21="Test 21: Reloads for the basic 16/32-bit library"
 #title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support"
 #title23="Test 23: Specials for the 16-bit library"
 #title24="Test 24: Specials for the 16-bit library with UTF-16 support"
 #title25="Test 25: Specials for the 32-bit library"
 #title26="Test 26: Specials for the 32-bit library with UTF-32 support"
 maxtest=2
 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title1
  echo $title2 "(not UTF)"
 #  echo $title3
 #  echo $title4A $title4B
 #  echo $title5 support
 #  echo $title6
 #  echo $title7
 #  echo $title8
 #  echo $title9
 #  echo $title10
 #  echo $title11
 #  echo $title12
 #  echo $title13
 #  echo $title14
 #  echo $title15
 #  echo $title16
 #  echo $title17
 #  echo $title18
 #  echo $title19
 #  echo $title20
 #  echo $title21
 #  echo $title22
 #  echo $title23
 #  echo $title24
 #  echo $title25
 #  echo $title26
  exit 0
 fi
 # Set up a suitable "diff" command for comparison. Some systems
 # have a diff that lacks a -u option. Try to deal with this.
 cf="diff"
 diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
 # Find the test data
 if [ -n "$srcdir" -a -d "$srcdir" ] ; then
  testdata="$srcdir/testdata"
 elif [ -d "./testdata" ] ; then
  testdata=./testdata
 elif [ -d "../testdata" ] ; then
  testdata=../testdata
 else
  echo "Cannot find the testdata directory"
  exit 1
 fi
 # ------ Special EBCDIC Test -------
 if [ $# -eq 1 -a "$1" = "ebcdic" ]; then
  ./pcre2test -C ebcdic >/dev/null
  ebcdic=$?
  if [ $ebcdic -ne 1 ] ; then
    echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
    exit 1
  fi
  for opt in "" "-dfa"; do
    ./pcre2test -q $opt $testdata/testinputEBC >testtry
    if [ $? = 0 ] ; then
      $cf $testdata/testoutputEBC testtry
      if [ $? != 0 ] ; then exit 1; fi
    else exit 1
    fi
    if [ "$opt" = "-dfa" ] ; then echo "  OK using DFA"
    else echo "  OK"
    fi
  done
 exit 0
 fi
 # ------ Normal Tests ------
 # Default values
 arg8=
 arg16=
 arg32=
 nojit=
 sim=
 skip=
 valgrind=
 # This is in case the caller has set aliases (as I do - PH)
 unset cp ls mv rm
 # Process options and select which tests to run; for those that are explicitly
 # requested, check that the necessary optional facilities are available.
 do1=no
 do2=no
 #do3=no
 #do4=no
 #do5=no
 #do6=no
 #do7=no
 #do8=no
 #do9=no
 #do10=no
 #do11=no
 #do12=no
 #do13=no
 #do14=no
 #do15=no
 #do16=no
 #do17=no
 #do18=no
 #do19=no
 #do20=no
 #do21=no
 #do22=no
 #do23=no
 #do24=no
 #do25=no
 #do26=no
 while [ $# -gt 0 ] ; do
  case $1 in
    1) do1=yes;;
    2) do2=yes;;
 #    3) do3=yes;;
 #    4) do4=yes;;
 #    5) do5=yes;;
 #    6) do6=yes;;
 #    7) do7=yes;;
 #    8) do8=yes;;
 #    9) do9=yes;;
 #   10) do10=yes;;
 #   11) do11=yes;;
 #   12) do12=yes;;
 #   13) do13=yes;;
 #   14) do14=yes;;
 #   15) do15=yes;;
 #   16) do16=yes;;
 #   17) do17=yes;;
 #   18) do18=yes;;
 #   19) do19=yes;;
 #   20) do20=yes;;
 #   21) do21=yes;;
 #   22) do22=yes;;
 #   23) do23=yes;;
 #   24) do24=yes;;
 #   25) do25=yes;;
 #   26) do26=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
   nojit) nojit=yes;;
   sim) shift; sim=$1;;
   valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
   valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
   ~*)
     if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
       skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
     else
       echo "Unknown option or test selector '$1'"; exit 1
     fi
   ;;
   *-*)
     if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
       tf=`expr "$1" : '\([0-9]*\)'`
       tt=`expr "$1" : '.*-\([0-9]*\)'`
       if [ "$tt" = "" ] ; then tt=$maxtest; fi
       if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
         echo "Invalid test range '$1'"; exit 1
       fi
       while expr "$tf" "<=" "$tt" >/dev/null; do
         eval do${tf}=yes
         tf=`expr $tf + 1`
       done
     else
       echo "Invalid test range '$1'"; exit 1
     fi
   ;;
   *) echo "Unknown option or test selector '$1'"; exit 1;;
  esac
  shift
 done
 # Find which optional facilities are available.
 $sim ./pcre2test -C linksize >/dev/null
 link_size=$?
 if [ $link_size -lt 2 ] ; then
  echo "Failed to find internal link size"
  exit 1
 fi
 if [ $link_size -gt 4 ] ; then
  echo "Failed to find internal link size"
  exit 1
 fi
 # All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
 # one need be.
 $sim ./pcre2test -C pcre8 >/dev/null
 support8=$?
 $sim ./pcre2test -C pcre16 >/dev/null
 support16=$?
 $sim ./pcre2test -C pcre32 >/dev/null
 support32=$?
 # Initialize all bitsizes skipped
 test8=skip
 test16=skip
 test32=skip
 # If no bitsize arguments, select all that are available
 if [ "$arg8$arg16$arg32" = "" ] ; then
  if [ $support8 -ne 0 ] ; then
    test8=
  fi
  if [ $support16 -ne 0 ] ; then
    test16=-16
  fi
  if [ $support32 -ne 0 ] ; then
    test32=-32
  fi
 # Select requested bit sizes
 else
  if [ "$arg8" = yes ] ; then
    if [ $support8 -eq 0 ] ; then
      echo "Cannot run 8-bit library tests: 8-bit library not compiled"
      exit 1
    fi
    test8=
  fi
  if [ "$arg16" = yes ] ; then
    if [ $support16 -eq 0 ] ; then
      echo "Cannot run 16-bit library tests: 16-bit library not compiled"
      exit 1
    fi
    test16=-16
  fi
  if [ "$arg32" = yes ] ; then
    if [ $support32 -eq 0 ] ; then
      echo "Cannot run 32-bit library tests: 32-bit library not compiled"
      exit 1
    fi
    test32=-32
  fi
 fi
 # UTF support always applies to all bit sizes if both are supported; we can't
 # have UTF-8 support without UTF-16 support (for example).
 $sim ./pcre2test -C utf >/dev/null
 utf=$?
 jitopt=
 $sim ./pcre2test -C jit >/dev/null
 jit=$?
 if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
  jitopt=-jit
 fi
 # If no specific tests were requested, select all. Those that are not
 # relevant will be automatically skipped.
 if [ $do1  = no -a $do2  = no ]; then
 #     -a $do3  = no -a $do4  = no -a \
 #     $do5  = no -a $do6  = no -a $do7  = no -a $do8  = no -a \
 #     $do9  = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
 #     $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
 #     $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
 #     $do21 = no -a $do22 = no -a $do23 = no -a $do24 = no -a \
 #     $do25 = no -a $do26 = no
  do1=yes
  do2=yes
 #  do3=yes
 #  do4=yes
 #  do5=yes
 #  do6=yes
 #  do7=yes
 #  do8=yes
 #  do9=yes
 #  do10=yes
 #  do11=yes
 #  do12=yes
 #  do13=yes
 #  do14=yes
 #  do15=yes
 #  do16=yes
 #  do17=yes
 #  do18=yes
 #  do19=yes
 #  do20=yes
 #  do21=yes
 #  do22=yes
 #  do23=yes
 #  do24=yes
 #  do25=yes
 #  do26=yes
 fi
 # Handle any explicit skips at this stage, so that an argument list may consist
 # only of explicit skips.
 for i in $skip; do eval do$i=no; done
 # Show which release and which test data
 echo ""
 echo PCRE2 C library tests using test data from $testdata
 $sim ./pcre2test /dev/null
 echo ""
 for bmode in "$test8" "$test16" "$test32"; do
  case "$bmode" in
    skip) continue;;
    -16)  if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
          bits=16; echo "---- Testing 16-bit library ----"; echo "";;
    -32)  if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
          bits=32; echo "---- Testing 32-bit library ----"; echo "";;
    *)    bits=8; echo "---- Testing 8-bit library ----"; echo "";;
  esac
 # Primary test, compatible with JIT and all versions of Perl >= 5.8
 if [ $do1 = yes ] ; then
  echo $title1
  for opt in "" $jitopt; do
    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
    if [ $? = 0 ] ; then
      $cf $testdata/testoutput1 testtry
      if [ $? != 0 ] ; then exit 1; fi
    else exit 1
    fi
    if [ "$opt" = "-jit" ] ; then echo "  OK with JIT"
    else echo "  OK"
    fi
  done
 fi
 # PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
 if [ $do2 = yes ] ; then
  echo $title2 "(not UTF-$bits)"
  for opt in "" $jitopt; do
    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
    if [ $? = 0 ] ; then
      $cf $testdata/testoutput2 testtry
      if [ $? != 0 ] ; then exit 1; fi
    else
      echo " "
      echo "** Test 2 requires a lot of stack. If it has crashed with a"
      echo "** segmentation fault, it may be that you do not have enough"
      echo "** stack available by default. Please see the 'pcre2stack' man"
      echo "** page for a discussion of PCRE2's stack usage."
      echo " "
      exit 1
    fi
    if [ "$opt" = "-jit" ] ; then echo "  OK with JIT"
    else echo "  OK"
    fi
  done
 fi
 ## Locale-specific tests, provided that either the "fr_FR" or the "french"
 ## locale is available. The former is the Unix-like standard; the latter is
 ## for Windows. Another possibility is "fr". Unfortunately, different versions
 ## of the French locale give different outputs for some items. This test passes
 ## if the output matches any one of the alternative output files.
 #
 #if [ $do3 = yes ] ; then
 #  locale -a | grep '^fr_FR$' >/dev/null
 #  if [ $? -eq 0 ] ; then
 #    locale=fr_FR
 #    infile=$testdata/testinput3
 #    outfile=$testdata/testoutput3
 #    outfile2=$testdata/testoutput3A
 #    outfile3=$testdata/testoutput3B
 #  else
 #    infile=test3input
 #    outfile=test3output
 #    outfile2=test3outputA
 #    outfile3=test3outputB
 #    locale -a | grep '^french$' >/dev/null
 #    if [ $? -eq 0 ] ; then
 #      locale=french
 #      sed 's/fr_FR/french/' $testdata/testinput3 >test3input
 #      sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
 #      sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
 #      sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
 #    else
 #      locale -a | grep '^fr$' >/dev/null
 #      if [ $? -eq 0 ] ; then
 #        locale=fr
 #        sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
 #        sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
 #        sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
 #        sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
 #      else
 #        locale=
 #      fi
 #    fi
 #  fi
 #
 #  if [ "$locale" != "" ] ; then
 #    echo $title3 "(using '$locale' locale)"
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
 #      if [ $? = 0 ] ; then
 #        if $cf $outfile testtry >teststdout || \
 #           $cf $outfile2 testtry >teststdout || \
 #           $cf $outfile3 testtry >teststdout
 #        then
 #          if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #          elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #          else echo "  OK"
 #          fi
 #        else
 #          echo "** Locale test did not run successfully. The output did not match"
 #          echo "   $outfile, $outfile2 or $outfile3."
 #          echo "   This may mean that there is a problem with the locale settings rather"
 #          echo "   than a bug in PCRE."
 #          exit 1
 #        fi
 #      else exit 1
 #      fi
 #    done
 #  else
 #    echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr' or"
 #    echo "'french' locales exist, or the \"locale\" command is not available"
 #    echo "to check for them."
 #    echo " "
 #  fi
 #fi
 #
 ## Additional tests for UTF support
 #
 #if [ $do4 = yes ] ; then
 #  echo ${title4A}-${bits}${title4B}
 #  if [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput4 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 #if [ $do5 = yes ] ; then
 #  echo ${title5}-${bits} support
 #  if [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput5 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 #if [ $do6 = yes ] ; then
 #  echo $title6
 #  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput6 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Test non-Perl-compatible Unicode property support
 #
 #if [ $do7 = yes ] ; then
 #  echo $title7
 #  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput7 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for DFA matching support
 #
 #if [ $do8 = yes ] ; then
 #  echo $title8
 #  for opt in "" "-s"; do
 #    $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput8 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
 #  done
 #fi
 #
 #if [ $do9 = yes ] ; then
 #  echo ${title9}-${bits}
 #  if [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    for opt in "" "-s"; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput9 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
 #    done
 #  fi
 #fi
 #
 #if [ $do10 = yes ] ; then
 #  echo $title10
 #  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s"; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput10 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
 #    done
 #  fi
 #fi
 #
 ## Test of internal offsets and code sizes. This test is run only when there
 ## is Unicode property support and the link size is 2. The actual tests are
 ## mostly the same as in some of the above, but in this test we inspect some
 ## offsets and sizes that require a known link size. This is a doublecheck for
 ## the maintainer, just in case something changes unexpectely. The output from
 ## this test is not the same in 8-bit and 16-bit modes.
 #
 #if [ $do11 = yes ] ; then
 #  echo $title11
 #  if [ $link_size -ne 2 ] ; then
 #    echo "  Skipped because link size is not 2"
 #  elif [ $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s"; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput11-$bits testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
 #    done
 #  fi
 #fi
 #
 ## Test JIT-specific features when JIT is available
 #
 #if [ $do12 = yes ] ; then
 #  echo $title12
 #  if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
 #    echo "  Skipped because JIT is not available or not usable"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput12 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput12 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 ## Test JIT-specific features when JIT is not available
 #
 #if [ $do13 = yes ] ; then
 #  echo $title13
 #  if [ $jit -ne 0 ] ; then
 #    echo "  Skipped because JIT is available"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput13 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 ## Tests for 8-bit-specific features
 #
 #if [ "$do14" = yes ] ; then
 #  echo $title14
 #  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
 #    echo "  Skipped when running 16/32-bit tests"
 #  else
 #    cp -f $testdata/saved16 testsaved16
 #    cp -f $testdata/saved32 testsaved32
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput14 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput14 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 8-bit-specific features (needs UTF-8 support)
 #
 #if [ "$do15" = yes ] ; then
 #  echo $title15
 #  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
 #    echo "  Skipped when running 16/32-bit tests"
 #  elif [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput15 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput15 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 8-bit-specific features (Unicode property support)
 #
 #if [ $do16 = yes ] ; then
 #  echo $title16
 #  if [ "$bits" = "16" -o "$bits" = "32" ] ; then
 #    echo "  Skipped when running 16/32-bit tests"
 #  elif [ $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput16 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput16 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 16/32-bit-specific features
 #
 #if [ $do17 = yes ] ; then
 #  echo $title17
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput17 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput17 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 16/32-bit-specific features (UTF-16/32 support)
 #
 #if [ $do18 = yes ] ; then
 #  echo $title18
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  elif [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput18 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput18-$bits testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 16/32-bit-specific features (Unicode property support)
 #
 #if [ $do19 = yes ] ; then
 #  echo $title19
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  elif [ $ucp -eq 0 ] ; then
 #    echo "  Skipped because Unicode property support is not available"
 #  else
 #    for opt in "" "-s" $jitopt; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput19 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput19 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for 16/32-bit-specific features in DFA non-UTF-16/32 mode
 #
 #if [ $do20 = yes ] ; then
 #  echo $title20
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  else
 #    for opt in "" "-s"; do
 #      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput20 testtry
 #      if [ $? = 0 ] ; then
 #        $cf $testdata/testoutput20 testtry
 #        if [ $? != 0 ] ; then exit 1; fi
 #      else exit 1
 #      fi
 #      if [ "$opt" = "-s" ] ; then echo "  OK with study"
 #      else echo "  OK"
 #      fi
 #    done
 #  fi
 #fi
 #
 ## Tests for reloads with 16/32-bit library
 #
 #if [ $do21 = yes ] ; then
 #  echo $title21
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  elif [ $link_size -ne 2 ] ; then
 #    echo "  Skipped because link size is not 2"
 #  else
 #    cp -f $testdata/saved8 testsaved8
 #    cp -f $testdata/saved16LE-1 testsaved16LE-1
 #    cp -f $testdata/saved16BE-1 testsaved16BE-1
 #    cp -f $testdata/saved32LE-1 testsaved32LE-1
 #    cp -f $testdata/saved32BE-1 testsaved32BE-1
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput21 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput21-$bits testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 ## Tests for reloads with 16/32-bit library (UTF-16 support)
 #
 #if [ $do22 = yes ] ; then
 #  echo $title22
 #  if [ "$bits" = "8" ] ; then
 #    echo "  Skipped when running 8-bit tests"
 #  elif [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  elif [ $link_size -ne 2 ] ; then
 #    echo "  Skipped because link size is not 2"
 #  else
 #    cp -f $testdata/saved16LE-2 testsaved16LE-2
 #    cp -f $testdata/saved16BE-2 testsaved16BE-2
 #    cp -f $testdata/saved32LE-2 testsaved32LE-2
 #    cp -f $testdata/saved32BE-2 testsaved32BE-2
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput22 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput22-$bits testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 #if [ $do23 = yes ] ; then
 #  echo $title23
 #  if [ "$bits" = "8" -o "$bits" = "32" ] ; then
 #    echo "  Skipped when running 8/32-bit tests"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput23 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput23 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 #if [ $do24 = yes ] ; then
 #  echo $title24
 #  if [ "$bits" = "8" -o "$bits" = "32" ] ; then
 #    echo "  Skipped when running 8/32-bit tests"
 #  elif [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput24 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput24 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 #if [ $do25 = yes ] ; then
 #  echo $title25
 #  if [ "$bits" = "8" -o "$bits" = "16" ] ; then
 #    echo "  Skipped when running 8/16-bit tests"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput25 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput25 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 #
 #if [ $do26 = yes ] ; then
 #  echo $title26
 #  if [ "$bits" = "8" -o "$bits" = "16" ] ; then
 #    echo "  Skipped when running 8/16-bit tests"
 #  elif [ $utf -eq 0 ] ; then
 #    echo "  Skipped because UTF-$bits support is not available"
 #  else
 #    $sim $valgrind ./pcre2test -q $bmode $testdata/testinput26 testtry
 #    if [ $? = 0 ] ; then
 #      $cf $testdata/testoutput26 testtry
 #      if [ $? != 0 ] ; then exit 1; fi
 #    else exit 1
 #    fi
 #    echo "  OK"
 #  fi
 #fi
 # End of loop for 8/16/32-bit tests
 done
 # Clean up local working files
 rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
 # End
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "05 July 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "22 July 2014" "PCRE 10.00"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -141,6 +141,10 @@ Output a brief summary these options and then exit.
 Behave as if each pattern has the \fB/info\fP modifier; information about the
 compiled pattern is given after compilation.
 .TP 10
 \fB-jit\fP
 Behave as if each pattern line has the \fBjit\fP modifier; after successful
 compilation, each pattern is passed to the just-in-time compiler, if available.
 .TP 10
 \fB-pattern\fB \fImodifier-list\fP
 Behave as if each pattern line contains the given modifiers.
 .TP 10
@ -216,6 +220,17 @@ In between sets of test data, a line that begins with a hash (#) character is
 interpreted as a command line. If the first character is followed by white
 space or an exclamation mark, the line is treated as a comment, and ignored.
 Otherwise, the following commands are recognized:
 .sp
  #forbid_utf
 .sp
 Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
 options set, which locks out the use of UTF and Unicode property features. This
 is a trigger guard that is used in test files to ensure that UTF/Unicode tests
 are not accidentally added to files that are used when UTF support is not
 included in the library. This effect can also be obtained by the use of
 \fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be unset, and
 the automatic options are not displayed in pattern information, to avoid
 cluttering up test output.
 .sp
  #load <file name>
 .sp
@ -358,10 +373,11 @@ the start of a modifier list. For example:
 .sp
  abc\=notbol,notempty
 .sp
-A backslash followed by anything else causes an error. However, if the very
+A backslash followed by any other non-alphanumeric character just escapes that
-last character in the line is a backslash (and there is no modifier list), it
+character. A backslash followed by anything else causes an error. However, if
-is ignored. This gives a way of passing an empty line as data, since a real
+the very last character in the line is a backslash (and there is no modifier
-empty line terminates the data input.
+list), it is ignored. This gives a way of passing an empty line as data, since
 a real empty line terminates the data input.
 .
 .
 .SH "PATTERN MODIFIERS"
@ -594,14 +610,17 @@ below. All other modifiers cause an error.
 .rs
 .sp
 The \fB/stackguard\fP modifier is used to test the use of
-\fBpcre2_stack_guard\fP. It must be followed by '0' or '1', specifying the
+\fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to
-return code to be given from an external function that is passed to PCRE2 and
+enable stack availability to be checked during compilation (see the
 used for stack checking during compilation (see the
 .\" HREF
 \fBpcre2api\fP
 .\"
-documentation for details). FIXME: this needs doing properly once the test is 
+documentation for details). If the number specified by the modifier is greater
-implemented. Mention nested parens limit.
+than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up
 callback from \fBpcre2_compile()\fP to a local function. The argument it is
 passed is the current nesting parenthesis depth; if this is greater than the
 value given by the modifier, non-zero is returned, causing the compilation to
 be aborted.
 .
 .
 .SS "Using alternative character tables"
@ -1210,6 +1229,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 05 July 2014
+Last updated: 22 July 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/perltest.pl
+++ b/perltest.pl
@ -0,0 +1,247 @@
 #! /usr/bin/env perl
 # Program for testing regular expressions with perl to check that PCRE2 handles
 # them the same. This version needs to have "use utf8" at the start for running
 # the UTF-8 tests, but *not* for the other tests. The only way I've found for
 # doing this is to cat this line in explicitly in the RunPerlTest script. I've
 # also used this method to supply "require Encode" for the UTF-8 tests, so that
 # the main test will still run where Encode is not installed.
 #use utf8;
 #require Encode;
 # Function for turning a string into a string of printing chars.
 sub pchars {
 my($t) = "";
 if ($utf8)
  {
  @p = unpack('U*', $_[0]);
  foreach $c (@p)
    {
    if ($c >= 32 && $c < 127) { $t .= chr $c; }
      else { $t .= sprintf("\\x{%02x}", $c);
      }
    }
  }
 else
  {
  foreach $c (split(//, $_[0]))
    {
    if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
      else { $t .= sprintf("\\x%02x", ord $c); }
    }
  }
 $t;
 }
 # Read lines from named file or stdin and write to named file or stdout; lines
 # consist of a regular expression, in delimiters and optionally followed by
 # options, followed by a set of test data, terminated by an empty line.
 # Sort out the input and output files
 if (@ARGV > 0)
  {
  open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
  $infile = "INFILE";
  }
 else { $infile = "STDIN"; }
 if (@ARGV > 1)
  {
  open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
  $outfile = "OUTFILE";
  }
 else { $outfile = "STDOUT"; }
 printf($outfile "Perl $] Regular Expressions\n\n");
 # Main loop
 NEXT_RE:
 for (;;)
  {
  printf "  re> " if $infile eq "STDIN";
  last if ! ($_ = <$infile>);
  printf $outfile "$_" if $infile ne "STDIN";
  next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
  $pattern = $_;
  while ($pattern !~ /^\s*(.).*\1/s)
    {
    printf "    > " if $infile eq "STDIN";
    last if ! ($_ = <$infile>);
    printf $outfile "$_" if $infile ne "STDIN";
    $pattern .= $_;
    }
  chomp($pattern);
  $pattern =~ s/\s+$//;
  # Split the pattern from the modifiers and adjust them as necessary.
  $pattern =~ /^\s*((.).*\2)(.*)$/s;
  $pat = $1;
  $mod = $3;
  # The private "aftertext" modifier means "print $' afterwards".
  $showrest = ($mod =~ s/aftertext,?//);
  # "allaftertext" is used by pcretest to print remainders after captures
  $mod =~ s/allaftertext,?//;
  # Detect utf
  $utf8 = $mod =~ s/utf,?//;
  # Remove "dupnames".
  $mod =~ s/dupnames,?//;
  # Remove "mark" (asks pcre2test to check MARK data) */
  $mod =~ s/mark,?//;
  # "ucp" asks pcre2test to set PCRE_UCP; change this to /u for Perl
  $mod =~ s/W(?=[a-zA-Z]*$)/u/;
  # Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
  $mod =~ s/no_auto_possess,?//;
  $mod =~ s/no_start_optimize,?//;
  # Add back retained modifiers and check that the pattern is valid.
  $mod =~ s/,//g;
  $pattern = "$pat$mod";
  eval "\$_ =~ ${pattern}";
  if ($@)
    {
    printf $outfile "Error: $@";
    if ($infile != "STDIN")
      {
      for (;;)
        {
        last if ! ($_ = <$infile>);
        last if $_ =~ /^\s*$/; 
        }   
      }  
    next NEXT_RE;
    }
  # If the /g modifier is present, we want to put a loop round the matching;
  # otherwise just a single "if".
  $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
  # If the pattern is actually the null string, Perl uses the most recently
  # executed (and successfully compiled) regex is used instead. This is a
  # nasty trap for the unwary! The PCRE2 test suite does contain null strings
  # in places - if they are allowed through here all sorts of weird and
  # unexpected effects happen. To avoid this, we replace such patterns with
  # a non-null pattern that has the same effect.
  $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
  # Read data lines and test them
  for (;;)
    {
    printf "data> " if $infile eq "STDIN";
    last NEXT_RE if ! ($_ = <$infile>);
    chomp;
    printf $outfile "$_\n" if $infile ne "STDIN";
    s/\s+$//;  # Remove trailing space
    s/^\s+//;  # Remove leading space
    s/\\Y//g;  # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
    last if ($_ eq "");
    $x = eval "\"$_\"";   # To get escapes processed
    # Empty array for holding results, ensure $REGERROR and $REGMARK are
    # unset, then do the matching.
    @subs = ();
    $pushes = "push \@subs,\$&;" .
         "push \@subs,\$1;" .
         "push \@subs,\$2;" .
         "push \@subs,\$3;" .
         "push \@subs,\$4;" .
         "push \@subs,\$5;" .
         "push \@subs,\$6;" .
         "push \@subs,\$7;" .
         "push \@subs,\$8;" .
         "push \@subs,\$9;" .
         "push \@subs,\$10;" .
         "push \@subs,\$11;" .
         "push \@subs,\$12;" .
         "push \@subs,\$13;" .
         "push \@subs,\$14;" .
         "push \@subs,\$15;" .
         "push \@subs,\$16;" .
         "push \@subs,\$'; }";
    undef $REGERROR;
    undef $REGMARK;
    eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
    if ($@)
      {
      printf $outfile "Error: $@\n";
      next NEXT_RE;
      }
    elsif (scalar(@subs) == 0)
      {
      printf $outfile "No match";
      if (defined $REGERROR && $REGERROR != 1)
        { printf $outfile (", mark = %s", &pchars($REGERROR)); }
      printf $outfile "\n";
      }
    else
      {
      while (scalar(@subs) != 0)
        {
        printf $outfile (" 0: %s\n", &pchars($subs[0]));
        printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
        $last_printed = 0;
        for ($i = 1; $i <= 16; $i++)
          {
          if (defined $subs[$i])
            {
            while ($last_printed++ < $i-1)
              { printf $outfile ("%2d: <unset>\n", $last_printed); }
            printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
            $last_printed = $i;
            }
          }
        splice(@subs, 0, 18);
        }
      # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
      # set and the input pattern was a UTF-8 string. We can, however, force
      # it to be so marked.
      if (defined $REGMARK && $REGMARK != 1)
        {
        $xx = $REGMARK;
        $xx = Encode::decode_utf8($xx) if $utf8;
        printf $outfile ("MK: %s\n", &pchars($xx));
        }
      }
    }
  }
 # printf $outfile "\n";
 # End
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -561,7 +561,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, 
       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, 
       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, 
-       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77 }; 
+       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78 }; 
 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -1703,10 +1703,10 @@ else
      ptr += 4;
      if (utf)
        {
-        if (c > 0x10ffffU) *errorcodeptr = ERR76;
+        if (c > 0x10ffffU) *errorcodeptr = ERR77;
          else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;  
        }
-      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR76; 
+      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; 
      }
    break;
@ -1815,12 +1815,11 @@ else
    recommended to avoid the ambiguities in the old syntax.
    Outside a character class, the digits are read as a decimal number. If the
-    number is less than 8 (used to be 10), or if there are that many previous
+    number is less than 10, or if there are that many previous extracting left
-    extracting left brackets, then it is a back reference. Otherwise, up to
+    brackets, it is a back reference. Otherwise, up to three octal digits are
-    three octal digits are read to form an escaped byte. Thus \123 is likely to
+    read to form an escaped byte. Thus \123 is likely to be octal 123 (cf
-    be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If
+    \0123, which is octal 012 followed by the literal 3). If the octal value is
-    the octal value is greater than 377, the least significant 8 bits are
+    greater than 377, the least significant 8 bits are taken.
    taken. \8 and \9 are treated as the literal characters 8 and 9.
    Inside a character class, \ followed by a digit is always either a literal
    8 or 9 or an octal number. */
@ -1832,7 +1831,7 @@ else
      {
      oldptr = ptr;
      /* The integer range is limited by the machine's int representation. */
-      s = (int)(c -CHAR_0);
+      s = (int)(c - CHAR_0);
      overflow = FALSE;
      while (IS_DIGIT(ptr[1]))
        {
@ -1849,7 +1848,7 @@ else
        *errorcodeptr = ERR61;
        break;
        }
-      if (s < 8 || s <= cb->bracount)  /* Check for back reference */
+      if (s < 10 || s <= cb->bracount)  /* Check for back reference */
        {
        escape = -s;
        break;
@ -1886,7 +1885,7 @@ else
    case CHAR_o:
    if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
-    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR77; else 
+    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else 
      {
      ptr += 2;
      c = 0;
@ -1947,7 +1946,7 @@ else
        ptr += 2;
        if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
          {
-          *errorcodeptr = ERR77;
+          *errorcodeptr = ERR78;
          break;
          }    
        c = 0;
@ -1955,12 +1954,12 @@ else
        while ((cc = XDIGIT(*ptr)) != 0xff)
          {
          ptr++;
          if (c == 0 && cc == 0) continue;   /* Leading zeroes */
 #if PCRE2_CODE_UNIT_WIDTH == 32
          if (c >= 0x10000000l) { overflow = TRUE; break; }
 #endif
          c = (c << 4) | cc;
          ptr++;
          if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
            {
            overflow = TRUE;
@ -2002,9 +2001,9 @@ else
    break;
    /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped.
-    An error is given if the byte following \c is not an ASCII character. This
+    An error is given if the byte following \c is not a printable ASCII
-    coding is ASCII-specific, but then the whole concept of \cx is
+    character. This coding is ASCII-specific, but then the whole concept of \cx
-    ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
+    is ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
    case CHAR_c:
    c = *(++ptr);
@ -2014,7 +2013,7 @@ else
      break;
      }
 #ifndef EBCDIC    /* ASCII/UTF-8 coding */
-    if (c > 127)  /* Excludes all non-ASCII in either mode */
+    if (c < 32 || c > 126)  /* Excludes all non-printable ASCII */
      {
      *errorcodeptr = ERR68;
      break;
@ -3820,7 +3819,7 @@ for (;; ptr++)
          {
          ptr += 2;
          if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
-            { ptr += 2; goto CONTINUE_CLASS; }
+            { ptr += 2; continue; }
          inescq = TRUE;
          break;
          }
@ -4981,7 +4980,7 @@ for (;; ptr++)
        arglen = (int)(ptr - arg);
        if ((unsigned int)arglen > MAX_MARK)
          {
-          *errorcodeptr = ERR75;
+          *errorcodeptr = ERR76;
          goto FAILED;
          }
        }
@ -6548,10 +6547,9 @@ Returns:            TRUE on success
 static BOOL
 compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipunits,
-  int cond_depth,
+  int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
-  uint32_t *firstcuptr, int32_t *firstcuflagsptr,
+  uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, 
-  uint32_t *reqcuptr, int32_t *reqcuflagsptr,
+  compile_block *cb, size_t *lengthptr)
  branch_chain *bcptr, compile_block *cb, size_t *lengthptr)
 {
 PCRE2_SPTR ptr = *ptrptr;
 PCRE2_UCHAR *code = *codeptr;
@ -6569,15 +6567,13 @@ unsigned int orig_bracount;
 unsigned int max_bracount;
 branch_chain bc;
 #ifdef FIXME
 /* If set, call the external function that checks for stack availability. */
-if (ccontext->stack_guard != NULL && ccontext->stack_guard(0))
+if (cb->cx->stack_guard != NULL && cb->cx->stack_guard(cb->parens_depth))
  {
  *errorcodeptr= ERR33;
  return FALSE;
  }
 #endif   
 /* Miscellaneous initialization */
@ -7434,7 +7430,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
          if (c > UINT32_MAX / 10 - 1) break;   /* Integer overflow */
          c = c*10 + ptr[pp++] - CHAR_0;
          }
-        if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) goto END_PSO;
+        if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) 
          {
          errorcode = ERR60; 
          goto HAD_ERROR;
          } 
        if (p->type == PSO_LIMM) limit_match = c;
          else limit_recursion = c;
        skipatstart += pp - skipatstart;
@ -7443,12 +7443,11 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
      break;   /* Out of the table scan loop */   
      }
    }
-  if (i > sizeof(pso_list)/sizeof(pso)) break;   /* Out of pso loop */
+  if (i >= sizeof(pso_list)/sizeof(pso)) break;   /* Out of pso loop */
  }
 /* End of pattern-start options; advance to start of real regex. */
 END_PSO:
 ptr += skipatstart;
 /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
@ -7477,6 +7476,15 @@ if (utf)
    goto HAD_ERROR;
  }   
 /* Check UCP lockout. */
 if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) == 
    (PCRE2_UCP|PCRE2_NEVER_UCP))
  {
  errorcode = ERR75;
  goto HAD_ERROR;
  }       
 /* Process the BSR setting. */
 if (bsr == 0) bsr = ccontext->bsr_convention;
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@ -148,15 +148,16 @@ static const char compile_error_texts[] =
  "different names for subpatterns of the same number are not allowed\0"
  "(*MARK) must have an argument\0"
  "non-hex character in \\x{} (closing brace missing?)\0"
-  "\\c must be followed by an ASCII character\0"
+  "\\c must be followed by a printable ASCII character\0"
  "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
  /* 70 */
  "internal error: unknown opcode in find_fixedlength()\0"
  "\\N is not supported in a class\0"
  "too many forward references\0"
  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
-  "using (*UTF) is disabled by the application\0"
+  "using UTF is disabled by the application\0"
  /* 75 */
  "using UCP is disabled by the application\0"
  "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
  "character code point value in \\u.... sequence is too large\0"
  "digits missing in \\x{} or \\o{}\0" 
@ -223,7 +224,7 @@ static const char match_error_texts[] =
  "JIT stack limit reached\0"
  "match limit exceeded\0"
  "no more memory\0"
-  "unknown substring\0" 
+  "unknown or unset substring\0" 
  /* 50 */ 
  "NULL argument passed\0"
  "nested recursion at the same subject position\0"
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -6782,6 +6782,12 @@ ENDLOOP:
 release_match_heapframes(&frame_zero, mb);
 #endif
 /* Fill in fields that are always returned in the match data. */
 match_data->code = re;
 match_data->subject = subject;
 match_data->mark = mb->mark;
 /* Handle a fully successful match. */
 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
@ -6842,25 +6848,26 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
    match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
    }
-  /* Fill in the remaining fields that are returned in the match data. */
+  /* Set the remaining returned values */
  match_data->code = re;
  match_data->subject = subject;
  match_data->leftchar = mb->start_used_ptr - subject;
  match_data->rightchar = 0;  /* FIXME */
  match_data->startchar = start_match - subject;
  match_data->mark = mb->mark;
  return match_data->rc;
  }
 /* Control gets here if there has been a partial match, an error, or if the
-overall match attempt has failed at all permitted starting positions. For
+overall match attempt has failed at all permitted starting positions. Any mark 
-anything other than nomatch or partial match, just return the code. */
+data is in the nomatch_mark field. */
 match_data->mark = mb->nomatch_mark;
 /* For anything other than nomatch or partial match, just return the code. */
 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
  match_data->rc = rc;
-/* Handle a partial match. */
+/* Else handle a partial match. */
 else if (match_partial != NULL)
  {
@ -6870,16 +6877,16 @@ else if (match_partial != NULL)
    match_data->ovector[1] = end_subject - subject;
    }
  match_data->leftchar = start_partial - subject;
  match_data->rightchar = 0;  /* FIXME */
  match_data->startchar = match_partial - subject;
  match_data->rc = PCRE2_ERROR_PARTIAL;
  }
-/* This is the classic nomatch case. */
+/* Else this is the classic nomatch case. */
-else
+else match_data->rc = PCRE2_ERROR_NOMATCH;
-  {
+
-  match_data->rc = PCRE2_ERROR_NOMATCH;
+/* Free any temporary offsets. */
  match_data->mark = mb->nomatch_mark;
  }
 if (using_temporary_offsets)
  mb->memctl.free(mb->ovector, mb->memctl.memory_data);
--- a/src/pcre2_substring.c
+++ b/src/pcre2_substring.c
@ -119,6 +119,7 @@ size_t left, right;
 size_t p = 0;
 PCRE2_SPTR subject = match_data->subject;
 if (stringnumber >= match_data->oveccount ||
    stringnumber > match_data->code->top_bracket ||
    (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 right = match_data->ovector[stringnumber*2+1];
@ -203,6 +204,7 @@ PCRE2_UCHAR *yield;
 PCRE2_SPTR subject = match_data->subject;
 if (stringnumber >= match_data->oveccount ||
    stringnumber > match_data->code->top_bracket ||
    (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 right = match_data->ovector[stringnumber*2+1];
@ -293,6 +295,7 @@ pcre2_substring_length_bynumber(pcre2_match_data *match_data,
  int stringnumber)
 {
 if (stringnumber >= match_data->oveccount ||
    stringnumber > match_data->code->top_bracket ||
    match_data->ovector[stringnumber*2] == PCRE2_UNSET)
  return PCRE2_ERROR_NOSUBSTRING;
 return match_data->ovector[stringnumber*2 + 1] -
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -46,7 +46,6 @@ POSSIBILITY OF SUCH DAMAGE.
 . save code and #load
 . JIT - compile, time, verify
 . memory handling testing
 . stackguard testing
 */
@ -435,7 +434,7 @@ static modstruct modlist[] = {
  { "dfa_shortest",        MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,        DO(options) },
  { "dollar_endonly",      MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,      PO(options) },
  { "dotall",              MOD_PATP, MOD_OPT, PCRE2_DOTALL,              PO(options) },
-  { "dupnames",            MOD_PAT,  MOD_OPT, PCRE2_DUPNAMES,            PO(options) },
+  { "dupnames",            MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,            PO(options) },
  { "extended",            MOD_PATP, MOD_OPT, PCRE2_EXTENDED,            PO(options) },
  { "find_limits",         MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,            DO(control) },
  { "firstline",           MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,           PO(options) },
@ -612,6 +611,7 @@ clock_t total_compile_time = 0;
 clock_t total_match_time = 0;
 static uint32_t dfa_matched;
 static uint32_t forbid_utf = 0;
 static uint32_t max_oveccount;
 static uint32_t callout_count;
@ -831,6 +831,14 @@ are supported. */
  else \
    pcre2_set_character_tables_32(G(a,32),b)
 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
  if (test_mode == PCRE8_MODE) \
    pcre2_set_compile_recursion_guard_8(G(a,8),b); \
  else if (test_mode == PCRE16_MODE) \
    pcre2_set_compile_recursion_guard_16(G(a,16),b); \
  else \
    pcre2_set_compile_recursion_guard_32(G(a,32),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) \
  if (test_mode == PCRE8_MODE) \
    pcre2_set_match_limit_8(G(a,8),b); \
@ -1102,6 +1110,12 @@ the three different cases. */
  else \
    G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
    G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b); \
  else \
    G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) \
  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
    G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@ -1245,8 +1259,10 @@ the three different cases. */
 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
 #define PCRE2_SET_CALLOUT(a,b,c) \
-  pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c);
+  pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *))b,c)
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
  pcre2_set_compile_recursion_guard_8(G(a,8),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
@ -1304,12 +1320,14 @@ the three different cases. */
 #define PCRE2_SET_CALLOUT(a,b,c) \
  pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *))b,c);
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
  pcre2_set_compile_recursion_guard_16(G(a,16),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e);
+  a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
-  a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e);
+  a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d) \
  a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d)
@ -1361,10 +1379,12 @@ the three different cases. */
 #define PCRE2_SET_CALLOUT(a,b,c) \
  pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *))b,c);
 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
  pcre2_set_compile_recursion_guard_32(G(a,32),b)
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
-  a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e);
+  a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
  a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
@ -1766,6 +1786,25 @@ free(block);
 #endif  /* NO_RECURSE */
 /*************************************************
 *       Callback function for stack guard        *
 *************************************************/
 /* This is set up to be called from pcre2_compile() when the stackguard=n
 modifier sets a value greater than zero. The test we do is whether the 
 parenthesis nesting depth is greater than the value set by the modifier.
 Argument:  the current parenthesis nesting depth
 Returns:   non-zero to kill the compilation
 */
 static int
 stack_guard(uint32_t depth)
 {
 return depth > pat_patctl.stackguard_test;
 }
 /*************************************************
 *      Convert UTF-8 character to code point     *
 *************************************************/
@ -2031,16 +2070,16 @@ return i + 1;
 #ifdef SUPPORT_PCRE16
 /*************************************************
-*         Convert a string to 16-bit             *
+*          Convert pattern to 16-bit             *
 *************************************************/
-/* The input is always interpreted as a string of UTF-8 bytes. If all the input
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
-bytes are ASCII, the space needed for a 16-bit string is exactly double the
+all the input bytes are ASCII, the space needed for a 16-bit string is exactly
-8-bit size. Otherwise, the size needed for a 16-bit string is no more than
+double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
-double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
+more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
-in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
+possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
-result is always left in pbuffer16. Impose a minimum size to save repeated
+UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
-re-sizing.
+repeated re-sizing.
 Note that this function does not object to surrogate values. This is
 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
@ -2074,9 +2113,13 @@ if (pbuffer16_size < 2*len + 2)
    exit(1);
    }
  }
 pp = pbuffer16;
-while (len > 0)
+pp = pbuffer16;
 if (!utf)
  {
  while (len-- > 0) *pp++ = *p++;
  }  
 else while (len > 0)
  {
  uint32_t c;
  int chlen = utf82ord(p, &c);
@ -2102,15 +2145,15 @@ return pp - pbuffer16;
 #ifdef SUPPORT_PCRE32
 /*************************************************
-*         Convert a string to 32-bit             *
+*          Convert pattern to 32-bit             *
 *************************************************/
-/* The input is always interpreted as a string of UTF-8 bytes. If all the input
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
-bytes are ASCII, the space needed for a 32-bit string is exactly four times the
+all the input bytes are ASCII, the space needed for a 32-bit string is exactly
-8-bit size. Otherwise, the size needed for a 32-bit string is no more than four
+four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
-times, because the number of characters must be less than the number of bytes.
+more than four times, because the number of characters must be less than the
-The result is always left in pbuffer32. Impose a minimum size to save repeated
+number of bytes. The result is always left in pbuffer32. Impose a minimum size
-re-sizing.
+to save repeated re-sizing.
 Note that this function does not object to surrogate values. This is
 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
@ -2143,9 +2186,13 @@ if (pbuffer32_size < 4*len + 4)
    exit(1);
    }
  }
 pp = pbuffer32;
-while (len > 0)
+pp = pbuffer32;
 if (!utf)
  {
  while (len-- > 0) *pp++ = *p++;
  }  
 else while (len > 0)
  {
  uint32_t c;
  int chlen = utf82ord(p, &c);
@ -3021,8 +3068,25 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
  pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
  /* Remove UTF/UCP if they were there only because of forbid_utf. This saves 
  cluttering up the verification output of non-UTF test files. */
  if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
    {
    compile_options &= ~PCRE2_NEVER_UTF; 
    overall_options &= ~PCRE2_NEVER_UTF; 
    }  
  if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
    {
    compile_options &= ~PCRE2_NEVER_UCP; 
    overall_options &= ~PCRE2_NEVER_UCP; 
    }  
  if ((compile_options|overall_options) == 0)
    fprintf(outfile, "No options\n");
  else if (compile_options == overall_options)
    show_compile_options(compile_options, "Options:", "\n");    
  else
    {
    show_compile_options(compile_options, "Compile options:", "\n");
@ -3035,26 +3099,26 @@ if ((pat_patctl.control & CTL_INFO) != 0)
    fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
      "any Unicode newline" : "CR, LF, or CRLF");
-  switch (newline_convention)
+  if (newline_convention != NEWLINE_DEFAULT) switch (newline_convention)
    {
    case PCRE2_NEWLINE_CR:
-    fprintf(outfile, "Newline is CR\n");
+    fprintf(outfile, "Forced newline is CR\n");
    break;
    case PCRE2_NEWLINE_LF:
-    fprintf(outfile, "Newline is LF\n");
+    fprintf(outfile, "Forced newline is LF\n");
    break;
    case PCRE2_NEWLINE_CRLF:
-    fprintf(outfile, "Newline is CRLF\n");
+    fprintf(outfile, "Forced newline is CRLF\n");
    break;
    case PCRE2_NEWLINE_ANYCRLF:
-    fprintf(outfile, "Newline is CR, LF, or CRLF\n");
+    fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
    break;
    case PCRE2_NEWLINE_ANY:
-    fprintf(outfile, "Newline is any Unicode newline\n");
+    fprintf(outfile, "Forced newline is any Unicode newline\n");
    break;
    default:
@ -3063,7 +3127,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
  if (first_ctype == 2)
    {
-    fprintf(outfile, "First char at start or follows newline\n");
+    fprintf(outfile, "First code unit at start or follows newline\n");
    }
  else if (first_ctype == 1)
    {
@ -3079,35 +3143,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
      fprintf(outfile, "%s\n", caseless);
      }
    }
-  else
+  else if (start_bits != NULL)
    {
    fprintf(outfile, "No first code unit\n");
    }
  if (last_ctype == 0)
    {
    fprintf(outfile, "No last code unit\n");
    }
  else
    {
    const char *caseless =
      ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
      "" : " (caseless)";
    if (PRINTOK(last_cunit))
      fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
    else
      {
      fprintf(outfile, "Last code unit = ");
      pchar(last_cunit, FALSE, outfile);
      fprintf(outfile, "%s\n", caseless);
      }
    }
  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
  if (start_bits == NULL)
    fprintf(outfile, "No starting code unit list\n");
  else
    {
    int i;
    int c = 24;
@ -3135,6 +3171,31 @@ if ((pat_patctl.control & CTL_INFO) != 0)
      }
    fprintf(outfile, "\n");
    }
  else
    {
    fprintf(outfile, "No first code unit\n");
    }
  if (last_ctype == 0)
    {
    fprintf(outfile, "No last code unit\n");
    }
  else
    {
    const char *caseless =
      ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
      "" : " (caseless)";
    if (PRINTOK(last_cunit))
      fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
    else
      {
      fprintf(outfile, "Last code unit = ");
      pchar(last_cunit, FALSE, outfile);
      fprintf(outfile, "%s\n", caseless);
      }
    }
  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
 /* FIXME: tidy this up */
@ -3183,7 +3244,11 @@ if (restrict_for_perl_test)
  return PR_ABEND;
  }
-if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
+if (strncmp((char *)buffer, "#forbid_utf", 11) == 0 && isspace(buffer[11]))
  {
  forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 
  } 
 else if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
  {
  (void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
  }
@ -3491,6 +3556,13 @@ else switch (pat_patctl.tables_id)
 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
 /* Set up for the stackguard test. */
 if (pat_patctl.stackguard_test != 0) 
  {
  PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
  } 
 /* Handle compiling via the POSIX interface, which doesn't support the
 timing, showing, or debugging options, nor the ability to pass over
 local character tables. Neither does it have 16-bit or 32-bit support. */
@ -3604,7 +3676,7 @@ if (timeit > 0)
  for (i = 0; i < timeit; i++)
    {
    PCRE2_COMPILE(compiled_code, pbuffer, patlen,
-      pat_patctl.options, &errorcode, &erroroffset, pat_context);
+      pat_patctl.options|forbid_utf, &errorcode, &erroroffset, pat_context);
    if (TEST(compiled_code, !=, NULL))
      { SUB1(pcre2_code_free, compiled_code); }
    }
@ -3618,8 +3690,8 @@ if (timeit > 0)
 /* A final compile that is used "for real". */
-PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options, &errorcode,
+PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf, 
-  &erroroffset, pat_context);
+  &errorcode, &erroroffset, pat_context);
 /* Compilation failed; go back for another re, skipping to blank line
 if non-interactive. */
@ -3782,14 +3854,12 @@ for (;;)
    min = mid;
    mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
    }
  else if (capcount >= 0 ||
           capcount == PCRE2_ERROR_NOMATCH ||
           capcount == PCRE2_ERROR_PARTIAL)
    {
    if (mid == min + 1)
      {
      if (capcount != PCRE2_ERROR_NOMATCH)
      fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
      break;
      }
@ -4184,9 +4254,12 @@ while ((c = *p++) != 0)
    continue;
    default:
    if (isalnum(c))
      { 
      fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
      return PR_OK;
      } 
    }
  /* We now have a character value in c that may be greater than 255.
  In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
@ -4608,7 +4681,12 @@ for (gmatched = 0;; gmatched++)
      PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer,
        sizeof(copybuffer)/code_unit_size);
      if (rc < 0)
-        fprintf(outfile, "copy substring %d failed %d\n", n, rc);
+        { 
        fprintf(outfile, "copy substring %d failed (%d): ", n, rc);
        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
        fprintf(outfile, "\n");
        } 
      else
        {
        fprintf(outfile, "%2dC ", n);
@ -4641,7 +4719,10 @@ for (gmatched = 0;; gmatched++)
        copybuffer, sizeof(copybuffer)/code_unit_size);
      if (rc < 0)
        {
-        fprintf(outfile, "copy substring '%s' failed %d\n", nptr, rc);
+        fprintf(outfile, "copy substring '%s' failed (%d): ", nptr, rc);
        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
        fprintf(outfile, "\n");
        }
      else
        {
@ -4661,7 +4742,12 @@ for (gmatched = 0;; gmatched++)
      uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
      PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer);
      if (rc < 0)
-        fprintf(outfile, "get substring %d failed %d\n", n, rc);
+        { 
        fprintf(outfile, "get substring %d failed (%d): ", n, rc);
        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
        fprintf(outfile, "\n");
        } 
      else
        {
        fprintf(outfile, "%2dG ", n);
@ -4694,7 +4780,10 @@ for (gmatched = 0;; gmatched++)
      PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer);
      if (rc < 0)
        {
-        fprintf(outfile, "get substring '%s' failed %d\n", nptr, rc);
+        fprintf(outfile, "get substring '%s' failed (%d): ", nptr, rc);
        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
        fprintf(outfile, "\n");
        }
      else
        {
@ -4715,7 +4804,12 @@ for (gmatched = 0;; gmatched++)
      size_t *lengths;
      PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
      if (rc < 0)
-        fprintf(outfile, "get substring list failed %d\n", rc);
+        { 
        fprintf(outfile, "get substring list failed (%d): ", rc);
        PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
        PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
        fprintf(outfile, "\n");
        } 
      else
        {
        for (i = 0; i < capcount; i++)
@ -4737,7 +4831,6 @@ for (gmatched = 0;; gmatched++)
  else if (capcount == PCRE2_ERROR_PARTIAL)
    {
    PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
    fprintf(outfile, "Partial match");
    if (leftchar != FLD(match_data, startchar))
      fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
@ -4880,8 +4973,8 @@ for (gmatched = 0;; gmatched++)
    else
      {
      pp += end_offset * code_unit_size;
-      len -= end_offset;
+      len -= end_offset * code_unit_size;
-      ulen -= end_offset *code_unit_size;
+      ulen -= end_offset;
      }
    }
  }  /* End of global loop */
@ -4894,7 +4987,7 @@ return PR_OK;
 /*************************************************
-*                Print PCRE version              *
+*               Print PCRE2 version              *
 *************************************************/
 /* The version string was read into 'version' at the start of execution. */
@ -4903,7 +4996,7 @@ static void
 print_version(FILE *f)
 {
 VERSION_TYPE *vp;
-fprintf(f, "PCRE version ");
+fprintf(f, "PCRE2 version ");
 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
 fprintf(f, "\n");
 }
@ -4976,6 +5069,7 @@ printf("  -d            set default pattern control 'debug'\n");
 printf("  -dfa          set default subject control 'dfa'\n");
 printf("  -help         show usage information\n");
 printf("  -i            set default pattern control 'info'\n");
 printf("  -jit          set default pattern control 'jit'\n");
 printf("  -q            quiet: do not output PCRE version number at start\n");
 printf("  -pattern <s>  set default pattern control fields\n");
 printf("  -subject <s>  set default subject control fields\n");
@ -5261,10 +5355,18 @@ while (argc > 1 && argv[op][0] == '-')
  /* Set some common pattern and subject controls */
  else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 
  else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
  else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
  else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
-  else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 
+  else if (strcmp(arg, "-jit") == 0)
    {
    def_patctl.jit = 7;  /* full & partial */ 
 #ifndef SUPPORT_JIT
    fprintf(stderr, "** Warning: JIT support is not available: "
                    "-jit calls dummy functions.\n");
 #endif     
    } 
  /* Set timing parameters */
@ -5503,7 +5605,8 @@ while (notdone)
    while (isspace(*p)) p++; 
    if (*p != 0)
      {
-      fprintf(stderr, "** Invalid pattern delimiter '%c'.\n", *buffer);
+      fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, 
        *buffer);
      rc = PR_SKIP;
      }
    }
--- a/testdata/testinput1
+++ b/testdata/testinput1
--- a/testdata/testinput2
+++ b/testdata/testinput2
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
--- a/testdata/testoutput2
+++ b/testdata/testoutput2