Runtime UTF checks now take not of the starting offset.

2015-08-18 10:34:05 +00:00 · 2015-08-18 10:34:05 +00:00 · ee41aa906f
parent 1370a49dfe
commit ee41aa906f
30 changed files with 2077 additions and 1664 deletions
--- a/4
+++ b/4
@ -145,6 +145,10 @@ was fixed.
 39. Match limit check added to recursion. This issue was found by Karl Skomski
 with a custom LLVM fuzzer.
 40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look 
 only at the part of the subject that is relevant when the starting offset is 
 non-zero.
 Version 10.20 30-June-2015
 --------------------------
--- a/76
+++ b/76
@ -68,12 +68,13 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
 title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
 title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
 title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
-title14="Test 14: Non-JIT limits and other non-JIT tests"
+title14="Test 14: DFA specials for UTF and UCP support"
-title15="Test 15: JIT-specific features when JIT is not available"
+title15="Test 15: Non-JIT limits and other non-JIT tests"
-title16="Test 16: JIT-specific features when JIT is available"
+title16="Test 16: JIT-specific features when JIT is not available"
-title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
+title17="Test 17: JIT-specific features when JIT is available"
-title18="Test 18: Tests of the POSIX interface with UTF/UCP"
+title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP"
-title19="Test 19: Serialization tests"
+title19="Test 19: Tests of the POSIX interface with UTF/UCP"
 title20="Test 20: Serialization tests"
 maxtest=18
 if [ $# -eq 1 -a "$1" = "list" ]; then
@ -97,6 +98,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title17
  echo $title18
  echo $title19
  echo $title20
  exit 0
 fi
@ -219,6 +221,7 @@ do16=no
 do17=no
 do18=no
 do19=no
 do20=no
 while [ $# -gt 0 ] ; do
  case $1 in
@ -242,6 +245,7 @@ while [ $# -gt 0 ] ; do
   17) do17=yes;;
   18) do18=yes;;
   19) do19=yes;;
   20) do20=yes;;
   -8) arg8=yes;;
  -16) arg16=yes;;
  -32) arg32=yes;;
@ -387,7 +391,8 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
     $do4  = no -a $do5  = no -a $do6  = no -a $do7  = no -a \
     $do8  = no -a $do9  = no -a $do10 = no -a $do11 = no -a \
     $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
-     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no \
+     $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
     $do20 = no \
   ]; then
  do0=yes
  do1=yes
@ -409,6 +414,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
  do17=yes
  do18=yes
  do19=yes
  do20=yes 
 fi
 # Handle any explicit skips at this stage, so that an argument list may consist
@ -689,71 +695,79 @@ for bmode in "$test8" "$test16" "$test32"; do
    fi
  fi
-  # Test non-JIT match and recursion limits
+  # Tests for DFA UTF and UCP features. Output is different for the different widths.
  if [ $do14 = yes ] ; then
    echo $title14
-    $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput14 testtry
+    $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput14 testtry
-    checkresult $? 14 ""
+    checkresult $? 14-$bits "$opt"
  fi
  # Test non-JIT match and recursion limits
  if [ $do15 = yes ] ; then
    echo $title15
    $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
    checkresult $? 15 ""
  fi
  # Test JIT-specific features when JIT is not available
  if [ $do15 = yes ] ; then
    echo $title15
    if [ $jit -ne 0 ] ; then
      echo "  Skipped because JIT is available"
    else
      $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
      checkresult $? 15 ""
    fi
  fi
  # Test JIT-specific features when JIT is available
  if [ $do16 = yes ] ; then
    echo $title16
-    if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
+    if [ $jit -ne 0 ] ; then
-      echo "  Skipped because JIT is not available or nojit was specified"
+      echo "  Skipped because JIT is available"
    else
      $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
      checkresult $? 16 ""
    fi
  fi
-  # Tests for the POSIX interface without UTF/UCP (8-bit only)
+  # Test JIT-specific features when JIT is available
  if [ $do17 = yes ] ; then
    echo $title17
-    if [ "$bits" = "16" -o "$bits" = "32" ] ; then
+    if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
-      echo "  Skipped when running 16/32-bit tests"
+      echo "  Skipped because JIT is not available or nojit was specified"
    else
      $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
      checkresult $? 17 ""
    fi
  fi
-  # Tests for the POSIX interface with UTF/UCP (8-bit only)
+  # Tests for the POSIX interface without UTF/UCP (8-bit only)
  if [ $do18 = yes ] ; then
    echo $title18
    if [ "$bits" = "16" -o "$bits" = "32" ] ; then
      echo "  Skipped when running 16/32-bit tests"
    elif [ $utf -eq 0 ] ; then
      echo "  Skipped because UTF-$bits support is not available"
    else
      $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
      checkresult $? 18 ""
    fi
  fi
-  # Serialization tests
+  # Tests for the POSIX interface with UTF/UCP (8-bit only)
  if [ $do19 = yes ] ; then
    echo $title19
    if [ "$bits" = "16" -o "$bits" = "32" ] ; then
      echo "  Skipped when running 16/32-bit tests"
    elif [ $utf -eq 0 ] ; then
      echo "  Skipped because UTF-$bits support is not available"
    else
      $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
      checkresult $? 19 ""
    fi
  fi
  # Serialization tests
  if [ $do20 = yes ] ; then
    echo $title20
    $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput20 testtry
    checkresult $? 20 ""
  fi
 # End of loop for 8/16/32-bit tests
 done
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "29 July 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -2022,12 +2022,19 @@ If the pattern is anchored, such a match can occur only if the pattern contains
 .sp
 When PCRE2_UTF is set at compile time, the validity of the subject as a UTF
 string is checked by default when \fBpcre2_match()\fP is subsequently called.
-The entire string is checked before any other processing takes place, and a
+If a non-zero starting offset is given, the check is applied only to that part
 of the subject that could be inspected during matching, and there is a check
 that the starting offset points to the first code unit of a character or to the
 end of the subject. If there are no lookbehind assertions in the pattern, the
 check starts at the starting offset. Otherwise, it starts at the length of the
 longest lookbehind before the starting offset, or at the start of the subject
 if there are not that many characters before the starting offset. Note that the
 sequences \eb and \eB are one-character lookbehinds.
 .P
 The check is carried out before any other processing takes place, and a
 negative error code is returned if the check fails. There are several UTF error
 codes for each code unit width, corresponding to different problems with the
-code unit sequence. The value of \fIstartoffset\fP is also checked, to ensure
+code unit sequence. There are discussions about the validity of
 that it points to the start of a character or to the end of the subject. There
 are discussions about the validity of
 .\" HTML <a href="pcre2unicode.html#utf8strings">
 .\" </a>
 UTF-8 strings,
@ -2939,6 +2946,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 29 July 2015
+Last updated: 18 August 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "23 November 2014" "PCRE2 10.00"
+.TH PCRE2UNICODE 3 "18 August 2015" "PCRE2 10.21"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@ -117,11 +117,21 @@ UTF-16 and UTF-32 strings can indicate their endianness by special code knows
 as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
 strings to be in host byte order.
 .P
-The entire string is checked before any other processing takes place. In
+A UTF string is checked before any other processing takes place. In the case of 
-addition to checking the format of the string, there is a check to ensure that
+\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP calls with a non-zero starting 
-all code points lie in the range U+0 to U+10FFFF, excluding the surrogate area.
+offset, the check is applied only to that part of the subject that could be
-The so-called "non-character" code points are not excluded because Unicode
+inspected during matching, and there is a check that the starting offset points
-corrigendum #9 makes it clear that they should not be.
+to the first code unit of a character or to the end of the subject. If there
 are no lookbehind assertions in the pattern, the check starts at the starting
 offset. Otherwise, it starts at the length of the longest lookbehind before the
 starting offset, or at the start of the subject if there are not that many
 characters before the starting offset. Note that the sequences \eb and \eB are
 one-character lookbehinds.
 .P
 In addition to checking the format of the string, there is a check to ensure
 that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate
 area. The so-called "non-character" code points are not excluded because
 Unicode corrigendum #9 makes it clear that they should not be.
 .P
 Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
 where they are used in pairs to encode code points with values greater than
@ -252,6 +262,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 November 2014
+Last updated: 18 August 2015
-Copyright (c) 1997-2014 University of Cambridge.
+Copyright (c) 1997-2015 University of Cambridge.
 .fi
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -4682,7 +4682,7 @@ for (;; ptr++)
      that it's a length rather than a small character. */
 #ifdef MAYBE_UTF_MULTI
-      if (utf && NOT_FIRSTCHAR(code[-1]))
+      if (utf && NOT_FIRSTCU(code[-1]))
        {
        PCRE2_UCHAR *lastchar = code - 1;
        BACKCHAR(lastchar);
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@ -2774,7 +2774,7 @@ for (;;)
              {
              PCRE2_SPTR p = start_subject + local_offsets[rc];
              PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
-              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
              }
 #endif
            if (charcount > 0)
@ -2874,7 +2874,7 @@ for (;;)
            PCRE2_SPTR pp = local_ptr;
            charcount = (int)(pp - p);
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-            if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+            if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
 #endif
            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
            }
@ -2960,7 +2960,7 @@ for (;;)
              {
              PCRE2_SPTR p = start_subject + local_offsets[0];
              PCRE2_SPTR pp = start_subject + local_offsets[1];
-              while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
+              while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
              }
 #endif
            ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
@ -3264,19 +3264,51 @@ switch(re->newline_convention)
 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
 we must also check that a starting offset does not point into the middle of a
-multiunit character. */
+multiunit character. We check only the portion of the subject that is going to 
 be inspected during matching - from the offset minus the maximum back reference 
 to the given length. This saves time when a small part of a large subject is 
 being matched by the use of a starting offset. Note that the maximum lookbehind 
 is a number of characters, not code units. */
 #ifdef SUPPORT_UNICODE
 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
-  match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
+  PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
-  if (match_data->rc != 0) return match_data->rc;
+
  if (start_offset > 0)
    { 
 #if PCRE2_CODE_UNIT_WIDTH != 32
-  if (start_offset > 0 && start_offset < length &&
+    unsigned int i; 
-      NOT_FIRSTCHAR(subject[start_offset]))
+    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
      return PCRE2_ERROR_BADUTFOFFSET;
    for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
      {
      check_subject--;
      while (check_subject > subject &&
 #if PCRE2_CODE_UNIT_WIDTH == 8
      (*check_subject & 0xc0) == 0x80)
 #else  /* 16-bit */
      (*check_subject & 0xfc00) == 0xdc00)
 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
        check_subject--; 
      }  
 #else   /* In the 32-bit library, one code unit equals one character. */
    check_subject -= re->max_lookbehind;
    if (check_subject < subject) check_subject = subject; 
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
    }
  /* Validate the relevant portion of the subject. After an error, adjust the
  offset to be an absolute offset in the whole string. */
  match_data->rc = PRIV(valid_utf)(check_subject, 
    length - (check_subject - subject), &(match_data->startchar));
  if (match_data->rc != 0) 
    {
    match_data->startchar += check_subject - subject;
    return match_data->rc;
    } 
  }
 #endif  /* SUPPORT_UNICODE */
 /* Set up the first code unit to match, if available. The first_codeunit value
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@ -72,7 +72,7 @@ just to undefine them all. */
 #undef MAX_MARK
 #undef MAX_PATTERN_SIZE
 #undef MAX_UTF_SINGLE_CU
-#undef NOT_FIRSTCHAR
+#undef NOT_FIRSTCU
 #undef PUT
 #undef PUT2
 #undef PUT2INC
@ -252,7 +252,7 @@ UTF support is omitted, we don't even define them. */
 /* #define MAX_UTF_SINGLE_CU */
 /* #define HAS_EXTRALEN(c) */
 /* #define GET_EXTRALEN(c) */
-/* #define NOT_FIRSTCHAR(c) */
+/* #define NOT_FIRSTCU(c) */
 #define GETCHAR(c, eptr) c = *eptr;
 #define GETCHARTEST(c, eptr) c = *eptr;
 #define GETCHARINC(c, eptr) c = *eptr++;
@ -285,10 +285,10 @@ Otherwise it has an undefined behaviour. */
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
-/* Returns TRUE, if the given character is not the first character
+/* Returns TRUE, if the given value is not the first code unit of a UTF
-of a UTF sequence. */
+sequence. */
-#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+#define NOT_FIRSTCU(c) (((c) & 0xc0) == 0x80)
 /* Get the next UTF-8 character, not advancing the pointer. This is called when
 we know we are in UTF-8 mode. */
@ -371,10 +371,10 @@ Otherwise it has an undefined behaviour. */
 #define GET_EXTRALEN(c) 1
-/* Returns TRUE, if the given character is not the first character
+/* Returns TRUE, if the given value is not the first code unit of a UTF
-of a UTF sequence. */
+sequence. */
-#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+#define NOT_FIRSTCU(c) (((c) & 0xfc00) == 0xdc00)
 /* Base macro to pick up the low surrogate of a UTF-16 character, not
 advancing the pointer. */
@ -469,7 +469,7 @@ into one PCRE2_UCHAR unit. */
 #define MAX_UTF_SINGLE_CU (0x10ffffu)
 #define HAS_EXTRALEN(c) (0)
 #define GET_EXTRALEN(c) (0)
-#define NOT_FIRSTCHAR(c) (0)
+#define NOT_FIRSTCU(c) (0)
 /* Get the next UTF-32 character, not advancing the pointer. This is called when
 we know we are in UTF-32 mode. */
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -6485,6 +6485,7 @@ mb->match_frames_base = &frame_zero;
 subject string. */
 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
 end_subject = subject + length;
 /* Plausibility checks */
@ -6536,19 +6537,51 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
 we must also check that a starting offset does not point into the middle of a
-multiunit character. */
+multiunit character. We check only the portion of the subject that is going to 
 be inspected during matching - from the offset minus the maximum back reference 
 to the given length. This saves time when a small part of a large subject is 
 being matched by the use of a starting offset. Note that the maximum lookbehind 
 is a number of characters, not code units. */
 #ifdef SUPPORT_UNICODE
 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
-  match_data->rc = PRIV(valid_utf)(subject, length, &(match_data->startchar));
+  PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
-  if (match_data->rc != 0) return match_data->rc;
+
  if (start_offset > 0)
    { 
 #if PCRE2_CODE_UNIT_WIDTH != 32
-  if (start_offset > 0 && start_offset < length &&
+    unsigned int i; 
-      NOT_FIRSTCHAR(subject[start_offset]))
+    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
      return PCRE2_ERROR_BADUTFOFFSET;
    for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
      {
      check_subject--;
      while (check_subject > subject &&
 #if PCRE2_CODE_UNIT_WIDTH == 8
      (*check_subject & 0xc0) == 0x80)
 #else  /* 16-bit */
      (*check_subject & 0xfc00) == 0xdc00)
 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
        check_subject--; 
      }  
 #else   /* In the 32-bit library, one code unit equals one character. */
    check_subject -= re->max_lookbehind;
    if (check_subject < subject) check_subject = subject; 
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
    }
  /* Validate the relevant portion of the subject. After an error, adjust the
  offset to be an absolute offset in the whole string. */
  match_data->rc = PRIV(valid_utf)(check_subject, 
    length - (check_subject - subject), &(match_data->startchar));
  if (match_data->rc != 0) 
    {
    match_data->startchar += check_subject - subject;
    return match_data->rc;
    } 
  }
 #endif  /* SUPPORT_UNICODE */
 /* If the pattern was successfully studied with JIT support, run the JIT
@ -6594,7 +6627,7 @@ else
 mb->start_subject = subject;
 mb->start_offset = start_offset;
-mb->end_subject = end_subject = mb->start_subject + length;
+mb->end_subject = end_subject;
 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
 mb->moptions = options;                 /* Match options */
--- a/testdata/testinput10
+++ b/testdata/testinput10
@ -133,6 +133,35 @@
    \xfc\x84\x80\x80\x80\x80\=no_utf_check
    \xfd\x83\x80\x80\x80\x80\=no_utf_check
 # Similar tests with offsets
 /badutf/utf
    X\xdfabcd
    X\xdfabcd\=offset=1
    X\xdfabcd\=offset=2
 /(?<=x)badutf/utf
    X\xdfabcd
    X\xdfabcd\=offset=1
    X\xdfabcd\=offset=2
    X\xdfabcd\=offset=3
    X\xdfabcd\xdf\=offset=3
 /(?<=xx)badutf/utf
    X\xdfabcd
    X\xdfabcd\=offset=1
    X\xdfabcd\=offset=2
    X\xdfabcd\=offset=3
 /(?<=xxxx)badutf/utf
    X\xdfabcd
    X\xdfabcd\=offset=1
    X\xdfabcd\=offset=2
    X\xdfabcd\=offset=3
    X\xdfabcd\=offset=6
    X\xdfabc\xdf\=offset=6
    X\xdfabc\xdf\=offset=7
 /\x{100}/IB,utf
 /\x{1000}/IB,utf
--- a/testdata/testinput12
+++ b/testdata/testinput12
@ -158,6 +158,7 @@
 /X/utf
    XX\x{d800}
    XX\x{d800}\=offset=3
    XX\x{d800}\=no_utf_check
    XX\x{da00}
    XX\x{da00}\=no_utf_check
@ -170,6 +171,9 @@
    XX\x{110000}
    XX\x{d800}\x{1234}
 /(?<=.)X/utf
    XX\x{d800}\=offset=3
 /(*UTF16)\x{11234}/
  abcd\x{11234}pqr
--- a/testdata/testinput14
+++ b/testdata/testinput14
@ -1,155 +1,37 @@
-# These are:
+# These test special (mostly error) UTF features of DFA matching. They are a 
-#
+# selection of the more comprehensive tests that are run for non-DFA matching.
-# (1) Tests of the match-limiting features. The results are different for
+# The output is different for the different widths.
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 16.
-# (2) Other tests that must not be run with JIT.
+#subject dfa
-/(a+)*zz/I
+/X/utf
-  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
+    XX\x{d800}
-  aaaaaaaaaaaaaz\=find_limits
+    XX\x{d800}\=offset=3
    XX\x{d800}\=no_utf_check
    XX\x{da00}
    XX\x{da00}\=no_utf_check
    XX\x{dc00}
    XX\x{dc00}\=no_utf_check
    XX\x{de00}
    XX\x{de00}\=no_utf_check
    XX\x{dfff}
    XX\x{dfff}\=no_utf_check
    XX\x{110000}
    XX\x{d800}\x{1234}
-!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
+/badutf/utf
-   /* this is a C style comment */\=find_limits
+    X\xdf
    XX\xef
    XXX\xef\x80
    X\xf7
    XX\xf7\x80
    XXX\xf7\x80\x80
-/^(?>a)++/
+/shortutf/utf
-    aa\=find_limits
+    XX\xdf\=ph
-    aaaaaaaaa\=find_limits
+    XX\xef\=ph
-    
+    XX\xef\x80\=ph
-/(a)(?1)++/
+    \xf7\=ph
-    aa\=find_limits
+    \xf7\x80\=ph
    aaaaaaaaa\=find_limits
 /a(?:.)*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /a(?:.(*THEN))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /a(?:.(*THEN:ABC))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
     aabbccddee\=find_limits
 /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
     aabbccddee\=find_limits
 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
     aabbccddee\=find_limits
 /(*LIMIT_MATCH=12bc)abc/
 /(*LIMIT_MATCH=4294967290)abc/
 /(*LIMIT_RECURSION=4294967280)abc/I
 /(a+)*zz/
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=3000
 /(a+)*zz/
    aaaaaaaaaaaaaz\=recursion_limit=10
 /(*LIMIT_MATCH=3000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=60000
 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
    aaaaaaaaaaaaaz
 /(*LIMIT_MATCH=60000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=3000
 /(*LIMIT_RECURSION=10)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=recursion_limit=1000
 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
    aaaaaaaaaaaaaz
 /(*LIMIT_RECURSION=1000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=recursion_limit=10
 # These three have infinitely nested recursions. 
 /((?2))((?1))/
    abc
 /((?(R2)a+|(?1)b))/
    aaaabcde
 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
 # The allusedtext modifier does not work with JIT, which does not maintain
 # the leftchar/rightchar data.
 /abc(?=xyz)/allusedtext
    abcxyzpqr
    abcxyzpqr\=aftertext
 /(?<=pqr)abc(?=xyz)/allusedtext
    xyzpqrabcxyzpqr
    xyzpqrabcxyzpqr\=aftertext
 /a\b/
    a.\=allusedtext
    a\=allusedtext  
 /abc\Kxyz/
    abcxyz\=allusedtext
 /abc(?=xyz(*ACCEPT))/
    abcxyz\=allusedtext
 /abc(?=abcde)(?=ab)/allusedtext
    abcabcdefg
 # These tests provoke recursion loops, which give a different error message
 # when JIT is used.
 /(?R)/I
    abcd
 /(a|(?R))/I
    abcd
    defg 
 /(ab|(bc|(de|(?R))))/I
    abcd
    fghi 
 /(ab|(bc|(de|(?1))))/I
    abcd
    fghi 
 /x(ab|(bc|(de|(?1)x)x)x)/I
    xab123
    xfghi 
 /(?!\w)(?R)/
    abcd
    =abc 
 /(?=\w)(?R)/
    =abc 
    abcd
 /(?<!\w)(?R)/
    abcd
 /(?<=\w)(?R)/
    abcd
 /(a+|(?R)b)/
    aaa
    bbb 
 /[^\xff]((?1))/BI
    abcd
 # End of testinput14
--- a/testdata/testinput15
+++ b/testdata/testinput15
@ -1,9 +1,155 @@
-# This test is run only when JIT support is not available. It checks that an
+# These are:
-# attempt to use it has the expected behaviour. It also tests things that
+#
-# are different without JIT.
+# (1) Tests of the match-limiting features. The results are different for
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 17.
-/abc/I,jit,jitverify
+# (2) Other tests that must not be run with JIT.
-/a*/I
+/(a+)*zz/I
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
  aaaaaaaaaaaaaz\=find_limits
 !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
   /* this is a C style comment */\=find_limits
 /^(?>a)++/
    aa\=find_limits
    aaaaaaaaa\=find_limits
 /(a)(?1)++/
    aa\=find_limits
    aaaaaaaaa\=find_limits
 /a(?:.)*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /a(?:.(*THEN))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /a(?:.(*THEN:ABC))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
     aabbccddee\=find_limits
 /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
     aabbccddee\=find_limits
 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
     aabbccddee\=find_limits
 /(*LIMIT_MATCH=12bc)abc/
 /(*LIMIT_MATCH=4294967290)abc/
 /(*LIMIT_RECURSION=4294967280)abc/I
 /(a+)*zz/
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=3000
 /(a+)*zz/
    aaaaaaaaaaaaaz\=recursion_limit=10
 /(*LIMIT_MATCH=3000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=60000
 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
    aaaaaaaaaaaaaz
 /(*LIMIT_MATCH=60000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=match_limit=3000
 /(*LIMIT_RECURSION=10)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=recursion_limit=1000
 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
    aaaaaaaaaaaaaz
 /(*LIMIT_RECURSION=1000)(a+)*zz/I
    aaaaaaaaaaaaaz
    aaaaaaaaaaaaaz\=recursion_limit=10
 # These three have infinitely nested recursions. 
 /((?2))((?1))/
    abc
 /((?(R2)a+|(?1)b))/
    aaaabcde
 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
 # The allusedtext modifier does not work with JIT, which does not maintain
 # the leftchar/rightchar data.
 /abc(?=xyz)/allusedtext
    abcxyzpqr
    abcxyzpqr\=aftertext
 /(?<=pqr)abc(?=xyz)/allusedtext
    xyzpqrabcxyzpqr
    xyzpqrabcxyzpqr\=aftertext
 /a\b/
    a.\=allusedtext
    a\=allusedtext  
 /abc\Kxyz/
    abcxyz\=allusedtext
 /abc(?=xyz(*ACCEPT))/
    abcxyz\=allusedtext
 /abc(?=abcde)(?=ab)/allusedtext
    abcabcdefg
 # These tests provoke recursion loops, which give a different error message
 # when JIT is used.
 /(?R)/I
    abcd
 /(a|(?R))/I
    abcd
    defg 
 /(ab|(bc|(de|(?R))))/I
    abcd
    fghi 
 /(ab|(bc|(de|(?1))))/I
    abcd
    fghi 
 /x(ab|(bc|(de|(?1)x)x)x)/I
    xab123
    xfghi 
 /(?!\w)(?R)/
    abcd
    =abc 
 /(?=\w)(?R)/
    =abc 
    abcd
 /(?<!\w)(?R)/
    abcd
 /(?<=\w)(?R)/
    abcd
 /(a+|(?R)b)/
    aaa
    bbb 
 /[^\xff]((?1))/BI
    abcd
 # End of testinput15
--- a/testdata/testinput16
+++ b/testdata/testinput16
--- a/testdata/testinput17
+++ b/testdata/testinput17
--- a/testdata/testinput18
+++ b/testdata/testinput18
@ -1,17 +1,95 @@
 # This set of tests is run only with the 8-bit library. It tests the POSIX
-# interface with UTF/UCP support, which is supported only with the 8-bit
+# interface, which is supported only with the 8-bit library. This test should
-# library. This test should not be run with JIT (which is not available for the
+# not be run with JIT (which is not available for the POSIX interface).
 # POSIX interface).
 #forbid_utf
 #pattern posix
-/a\x{1234}b/utf
+# Test invalid options
    a\x{1234}b
-/\w/
+/abc/auto_callout
    +++\x{c2}
-/\w/ucp
+/abc/
-    +++\x{c2}
+   abc\=find_limits
-# End of testdata/testinput17
+/abc/
  abc\=partial_hard
 # Real tests
 /abc/
    abc
    *** Failers
 /^abc|def/
    abcdef
    abcdef\=notbol
 /.*((abc)$|(def))/
    defabc
    defabc\=noteol
 /the quick brown fox/
    the quick brown fox
    *** Failers
    The Quick Brown Fox
 /the quick brown fox/i
    the quick brown fox
    The Quick Brown Fox
 /abc.def/
    *** Failers
    abc\ndef
 /abc$/
    abc
    abc\n
 /(abc)\2/
 /(abc\1)/
    abc
 /a*(b+)(z)(z)/
    aaaabbbbzzzz
    aaaabbbbzzzz\=ovector=0
    aaaabbbbzzzz\=ovector=1
    aaaabbbbzzzz\=ovector=2
 /ab.cd/
    ab-cd
    ab=cd
    ** Failers
    ab\ncd
 /ab.cd/s
    ab-cd
    ab=cd
    ab\ncd
 /a(b)c/no_auto_capture
    abc
 /a(?P<name>b)c/no_auto_capture
    abc
 /a?|b?/
    abc
    ** Failers
    ddd\=notempty
 /\w+A/
   CDAAAAB
 /\w+A/ungreedy
   CDAAAAB
 /\Biss\B/I,aftertext
    Mississippi
 /abc/\
 "(?(?C)"
 # End of testdata/testinput18
--- a/testdata/testinput19
+++ b/testdata/testinput19
@ -1,62 +1,17 @@
-# This set of tests exercises the serialization/deserialization functions in
+# This set of tests is run only with the 8-bit library. It tests the POSIX
-# the library. It does not use UTF or JIT.
+# interface with UTF/UCP support, which is supported only with the 8-bit
 # library. This test should not be run with JIT (which is not available for the
 # POSIX interface).
-#forbid_utf
+#pattern posix
-# Compile several patterns, push them onto the stack, and then write them
+/a\x{1234}b/utf
-# all to a file.
+    a\x{1234}b
-#pattern push
+/\w/
    +++\x{c2}
-/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
+/\w/ucp
-  (?(DEFINE)
+    +++\x{c2}
  (?<NAME_PAT>[a-z]+)
  (?<ADDRESS_PAT>\d+)
  )/x
 /^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
-#save testsaved1
+# End of testdata/testinput19
 # Do it again for some more patterns.
 /(*MARK:A)(*SKIP:B)(C|X)/mark
 /(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
 #save testsaved2
 #pattern -push
 # Reload the patterns, then pop them one by one and check them.
 #load testsaved1
 #load testsaved2
 #pop info
    foofoo             
    barbar
 #pop mark
    C
    D 
 #pop
    AmanaplanacanalPanama   
 #pop info
    metcalfe 33
 # Check for an error when different tables are used.
 /abc/push,tables=1
 /xyz/push,tables=2
 #save testsaved1
 #pop
    xyz
 #pop
    abc
 #pop should give an error
    pqr
 # End of testinput19 
--- a/testdata/testinput20
+++ b/testdata/testinput20
@ -0,0 +1,62 @@
 # This set of tests exercises the serialization/deserialization functions in
 # the library. It does not use UTF or JIT.
 #forbid_utf
 # Compile several patterns, push them onto the stack, and then write them
 # all to a file.
 #pattern push
 /(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
  (?(DEFINE)
  (?<NAME_PAT>[a-z]+)
  (?<ADDRESS_PAT>\d+)
  )/x
 /^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
 #save testsaved1
 # Do it again for some more patterns.
 /(*MARK:A)(*SKIP:B)(C|X)/mark
 /(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
 #save testsaved2
 #pattern -push
 # Reload the patterns, then pop them one by one and check them.
 #load testsaved1
 #load testsaved2
 #pop info
    foofoo             
    barbar
 #pop mark
    C
    D 
 #pop
    AmanaplanacanalPanama   
 #pop info
    metcalfe 33
 # Check for an error when different tables are used.
 /abc/push,tables=1
 /xyz/push,tables=2
 #save testsaved1
 #pop
    xyz
 #pop
    abc
 #pop should give an error
    pqr
 # End of testinput20
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@ -236,6 +236,54 @@ No match
    \xfd\x83\x80\x80\x80\x80\=no_utf_check
 No match
 # Similar tests with offsets
 /badutf/utf
    X\xdfabcd
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=1
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=2
 No match
 /(?<=x)badutf/utf
    X\xdfabcd
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=1
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=2
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=3
 No match
    X\xdfabcd\xdf\=offset=3
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
 /(?<=xx)badutf/utf
    X\xdfabcd
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=1
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=2
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=3
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
 /(?<=xxxx)badutf/utf
    X\xdfabcd
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=1
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=2
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=3
 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    X\xdfabcd\=offset=6
 No match
    X\xdfabc\xdf\=offset=6
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
    X\xdfabc\xdf\=offset=7
 Failed: error -33: bad offset value
 /\x{100}/IB,utf
 ------------------------------------------------------------------
        Bra
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@ -609,6 +609,8 @@ Failed: error 106 at offset 13: missing terminating ] for character class
 /X/utf
    XX\x{d800}
 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
    XX\x{d800}\=offset=3
 No match
    XX\x{d800}\=no_utf_check
 0: X
    XX\x{da00}
@ -632,6 +634,10 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
    XX\x{d800}\x{1234}
 Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
 /(?<=.)X/utf
    XX\x{d800}\=offset=3
 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
 /(*UTF16)\x{11234}/
  abcd\x{11234}pqr
 0: \x{11234}
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@ -602,6 +602,8 @@ Failed: error 106 at offset 13: missing terminating ] for character class
 /X/utf
    XX\x{d800}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{d800}\=offset=3
 No match
    XX\x{d800}\=no_utf_check
 0: X
    XX\x{da00}
@ -625,6 +627,10 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin
    XX\x{d800}\x{1234}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
 /(?<=.)X/utf
    XX\x{d800}\=offset=3
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
 /(*UTF16)\x{11234}/
 Failed: error 160 at offset 5: (*VERB) not recognized or malformed
  abcd\x{11234}pqr
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@ -1,334 +0,0 @@
 # These are:
 #
 # (1) Tests of the match-limiting features. The results are different for
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 16.
 # (2) Other tests that must not be run with JIT.
 /(a+)*zz/I
 Capturing subpattern count = 1
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
 Minimum match limit = 8
 Minimum recursion limit = 6
 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
  aaaaaaaaaaaaaz\=find_limits
 Minimum match limit = 32768
 Minimum recursion limit = 29
 No match
 !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
 Capturing subpattern count = 1
 May match empty string
 Subject length lower bound = 0
   /* this is a C style comment */\=find_limits
 Minimum match limit = 120
 Minimum recursion limit = 6
 0: /* this is a C style comment */
 1: /* this is a C style comment */
 /^(?>a)++/
    aa\=find_limits
 Minimum match limit = 5
 Minimum recursion limit = 2
 0: aa
    aaaaaaaaa\=find_limits
 Minimum match limit = 12
 Minimum recursion limit = 2
 0: aaaaaaaaa
 /(a)(?1)++/
    aa\=find_limits
 Minimum match limit = 7
 Minimum recursion limit = 4
 0: aa
 1: a
    aaaaaaaaa\=find_limits
 Minimum match limit = 21
 Minimum recursion limit = 4
 0: aaaaaaaaa
 1: a
 /a(?:.)*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 65
 Minimum recursion limit = 2
 0: abbbbbbbbbbbbbbbbbbbbba
 /a(?:.(*THEN))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 86
 Minimum recursion limit = 45
 0: abbbbbbbbbbbbbbbbbbbbba
 /a(?:.(*THEN:ABC))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 86
 Minimum recursion limit = 45
 0: abbbbbbbbbbbbbbbbbbbbba
 /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
     aabbccddee\=find_limits
 Minimum match limit = 7
 Minimum recursion limit = 2
 0: aabbccddee
 /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
     aabbccddee\=find_limits
 Minimum match limit = 17
 Minimum recursion limit = 16
 0: aabbccddee
 1: aa
 2: bb
 3: cc
 4: dd
 5: ee
 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
     aabbccddee\=find_limits
 Minimum match limit = 13
 Minimum recursion limit = 10
 0: aabbccddee
 1: aa
 2: cc
 3: ee
 /(*LIMIT_MATCH=12bc)abc/
 Failed: error 160 at offset 17: (*VERB) not recognized or malformed
 /(*LIMIT_MATCH=4294967290)abc/
 Failed: error 160 at offset 24: (*VERB) not recognized or malformed
 /(*LIMIT_RECURSION=4294967280)abc/I
 Capturing subpattern count = 0
 Recursion limit = 4294967280
 First code unit = 'a'
 Last code unit = 'c'
 Subject length lower bound = 3
 /(a+)*zz/
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
 Failed: error -47: match limit exceeded
 /(a+)*zz/
    aaaaaaaaaaaaaz\=recursion_limit=10
 Failed: error -53: recursion limit exceeded
 /(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 3000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -47: match limit exceeded
    aaaaaaaaaaaaaz\=match_limit=60000
 Failed: error -47: match limit exceeded
 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 3000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -47: match limit exceeded
 /(*LIMIT_MATCH=60000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 60000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
 Failed: error -47: match limit exceeded
 /(*LIMIT_RECURSION=10)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 10
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -53: recursion limit exceeded
    aaaaaaaaaaaaaz\=recursion_limit=1000
 Failed: error -53: recursion limit exceeded
 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 1000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
 /(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 1000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=recursion_limit=10
 Failed: error -53: recursion limit exceeded
 # These three have infinitely nested recursions. 
 /((?2))((?1))/
    abc
 Failed: error -52: nested recursion at the same subject position
 /((?(R2)a+|(?1)b))/
    aaaabcde
 Failed: error -52: nested recursion at the same subject position
 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
 Failed: error -52: nested recursion at the same subject position
 # The allusedtext modifier does not work with JIT, which does not maintain
 # the leftchar/rightchar data.
 /abc(?=xyz)/allusedtext
    abcxyzpqr
 0: abcxyz
       >>>
    abcxyzpqr\=aftertext
 0: abcxyz
       >>>
 0+ xyzpqr
 /(?<=pqr)abc(?=xyz)/allusedtext
    xyzpqrabcxyzpqr
 0: pqrabcxyz
    <<<   >>>
    xyzpqrabcxyzpqr\=aftertext
 0: pqrabcxyz
    <<<   >>>
 0+ xyzpqr
 /a\b/
    a.\=allusedtext
 0: a.
     >
    a\=allusedtext  
 0: a
 /abc\Kxyz/
    abcxyz\=allusedtext
 0: abcxyz
    <<<   
 /abc(?=xyz(*ACCEPT))/
    abcxyz\=allusedtext
 0: abcxyz
       >>>
 /abc(?=abcde)(?=ab)/allusedtext
    abcabcdefg
 0: abcabcde
       >>>>>
 # These tests provoke recursion loops, which give a different error message
 # when JIT is used.
 /(?R)/I
 Capturing subpattern count = 0
 May match empty string
 Subject length lower bound = 0
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(a|(?R))/I
 Capturing subpattern count = 1
 May match empty string
 Subject length lower bound = 1
    abcd
 0: a
 1: a
    defg 
 Failed: error -52: nested recursion at the same subject position
 /(ab|(bc|(de|(?R))))/I
 Capturing subpattern count = 3
 May match empty string
 Subject length lower bound = 2
    abcd
 0: ab
 1: ab
    fghi 
 Failed: error -52: nested recursion at the same subject position
 /(ab|(bc|(de|(?1))))/I
 Capturing subpattern count = 3
 May match empty string
 Subject length lower bound = 2
    abcd
 0: ab
 1: ab
    fghi 
 Failed: error -52: nested recursion at the same subject position
 /x(ab|(bc|(de|(?1)x)x)x)/I
 Capturing subpattern count = 3
 First code unit = 'x'
 Subject length lower bound = 3
    xab123
 0: xab
 1: ab
    xfghi 
 Failed: error -52: nested recursion at the same subject position
 /(?!\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
    =abc 
 Failed: error -52: nested recursion at the same subject position
 /(?=\w)(?R)/
    =abc 
 Failed: error -52: nested recursion at the same subject position
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(?<!\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(?<=\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(a+|(?R)b)/
    aaa
 0: aaa
 1: aaa
    bbb 
 Failed: error -52: nested recursion at the same subject position
 /[^\xff]((?1))/BI
 ------------------------------------------------------------------
        Bra
        [^\x{ff}]
        CBra 1
        Recurse
        Ket
        Ket
        End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Subject length lower bound = 1
    abcd
 Failed: error -52: nested recursion at the same subject position
 # End of testinput14
--- a/testdata/testoutput14-16
+++ b/testdata/testoutput14-16
@ -0,0 +1,61 @@
 # These test special (mostly error) UTF features of DFA matching. They are a 
 # selection of the more comprehensive tests that are run for non-DFA matching.
 # The output is different for the different widths.
 #subject dfa
 /X/utf
    XX\x{d800}
 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
    XX\x{d800}\=offset=3
 No match
    XX\x{d800}\=no_utf_check
 0: X
    XX\x{da00}
 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
    XX\x{da00}\=no_utf_check
 0: X
    XX\x{dc00}
 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
    XX\x{dc00}\=no_utf_check
 0: X
    XX\x{de00}
 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
    XX\x{de00}\=no_utf_check
 0: X
    XX\x{dfff}
 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
    XX\x{dfff}\=no_utf_check
 0: X
    XX\x{110000}
 ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
    XX\x{d800}\x{1234}
 Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
 /badutf/utf
    X\xdf
 No match
    XX\xef
 No match
    XXX\xef\x80
 No match
    X\xf7
 No match
    XX\xf7\x80
 No match
    XXX\xf7\x80\x80
 No match
 /shortutf/utf
    XX\xdf\=ph
 No match
    XX\xef\=ph
 No match
    XX\xef\x80\=ph
 No match
    \xf7\=ph
 No match
    \xf7\x80\=ph
 No match
 # End of testinput14
--- a/testdata/testoutput14-32
+++ b/testdata/testoutput14-32
@ -0,0 +1,61 @@
 # These test special (mostly error) UTF features of DFA matching. They are a 
 # selection of the more comprehensive tests that are run for non-DFA matching.
 # The output is different for the different widths.
 #subject dfa
 /X/utf
    XX\x{d800}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{d800}\=offset=3
 No match
    XX\x{d800}\=no_utf_check
 0: X
    XX\x{da00}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{da00}\=no_utf_check
 0: X
    XX\x{dc00}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{dc00}\=no_utf_check
 0: X
    XX\x{de00}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{de00}\=no_utf_check
 0: X
    XX\x{dfff}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{dfff}\=no_utf_check
 0: X
    XX\x{110000}
 Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
    XX\x{d800}\x{1234}
 Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
 /badutf/utf
    X\xdf
 No match
    XX\xef
 No match
    XXX\xef\x80
 No match
    X\xf7
 No match
    XX\xf7\x80
 No match
    XXX\xf7\x80\x80
 No match
 /shortutf/utf
    XX\xdf\=ph
 No match
    XX\xef\=ph
 No match
    XX\xef\x80\=ph
 No match
    \xf7\=ph
 No match
    \xf7\x80\=ph
 No match
 # End of testinput14
--- a/testdata/testoutput14-8
+++ b/testdata/testoutput14-8
@ -0,0 +1,61 @@
 # These test special (mostly error) UTF features of DFA matching. They are a 
 # selection of the more comprehensive tests that are run for non-DFA matching.
 # The output is different for the different widths.
 #subject dfa
 /X/utf
    XX\x{d800}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{d800}\=offset=3
 Error -36 (bad UTF-8 offset)
    XX\x{d800}\=no_utf_check
 0: X
    XX\x{da00}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{da00}\=no_utf_check
 0: X
    XX\x{dc00}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{dc00}\=no_utf_check
 0: X
    XX\x{de00}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{de00}\=no_utf_check
 0: X
    XX\x{dfff}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
    XX\x{dfff}\=no_utf_check
 0: X
    XX\x{110000}
 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
    XX\x{d800}\x{1234}
 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
 /badutf/utf
    X\xdf
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
    XX\xef
 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
    XXX\xef\x80
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
    X\xf7
 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
    XX\xf7\x80
 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
    XXX\xf7\x80\x80
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
 /shortutf/utf
    XX\xdf\=ph
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
    XX\xef\=ph
 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
    XX\xef\x80\=ph
 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
    \xf7\=ph
 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
    \xf7\x80\=ph
 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
 # End of testinput14
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@ -1,17 +1,334 @@
-# This test is run only when JIT support is not available. It checks that an
+# These are:
-# attempt to use it has the expected behaviour. It also tests things that
+#
-# are different without JIT.
+# (1) Tests of the match-limiting features. The results are different for
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 17.
-/abc/I,jit,jitverify
+# (2) Other tests that must not be run with JIT.
 /(a+)*zz/I
 Capturing subpattern count = 1
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
 Minimum match limit = 8
 Minimum recursion limit = 6
 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
  aaaaaaaaaaaaaz\=find_limits
 Minimum match limit = 32768
 Minimum recursion limit = 29
 No match
 !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
 Capturing subpattern count = 1
 May match empty string
 Subject length lower bound = 0
   /* this is a C style comment */\=find_limits
 Minimum match limit = 120
 Minimum recursion limit = 6
 0: /* this is a C style comment */
 1: /* this is a C style comment */
 /^(?>a)++/
    aa\=find_limits
 Minimum match limit = 5
 Minimum recursion limit = 2
 0: aa
    aaaaaaaaa\=find_limits
 Minimum match limit = 12
 Minimum recursion limit = 2
 0: aaaaaaaaa
 /(a)(?1)++/
    aa\=find_limits
 Minimum match limit = 7
 Minimum recursion limit = 4
 0: aa
 1: a
    aaaaaaaaa\=find_limits
 Minimum match limit = 21
 Minimum recursion limit = 4
 0: aaaaaaaaa
 1: a
 /a(?:.)*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 65
 Minimum recursion limit = 2
 0: abbbbbbbbbbbbbbbbbbbbba
 /a(?:.(*THEN))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 86
 Minimum recursion limit = 45
 0: abbbbbbbbbbbbbbbbbbbbba
 /a(?:.(*THEN:ABC))*?a/ims
    abbbbbbbbbbbbbbbbbbbbba\=find_limits
 Minimum match limit = 86
 Minimum recursion limit = 45
 0: abbbbbbbbbbbbbbbbbbbbba
 /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
     aabbccddee\=find_limits
 Minimum match limit = 7
 Minimum recursion limit = 2
 0: aabbccddee
 /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
     aabbccddee\=find_limits
 Minimum match limit = 17
 Minimum recursion limit = 16
 0: aabbccddee
 1: aa
 2: bb
 3: cc
 4: dd
 5: ee
 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
     aabbccddee\=find_limits
 Minimum match limit = 13
 Minimum recursion limit = 10
 0: aabbccddee
 1: aa
 2: cc
 3: ee
 /(*LIMIT_MATCH=12bc)abc/
 Failed: error 160 at offset 17: (*VERB) not recognized or malformed
 /(*LIMIT_MATCH=4294967290)abc/
 Failed: error 160 at offset 24: (*VERB) not recognized or malformed
 /(*LIMIT_RECURSION=4294967280)abc/I
 Capturing subpattern count = 0
 Recursion limit = 4294967280
 First code unit = 'a'
 Last code unit = 'c'
 Subject length lower bound = 3
 JIT support is not available in this version of PCRE2
-/a*/I
+/(a+)*zz/
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
 Failed: error -47: match limit exceeded
 /(a+)*zz/
    aaaaaaaaaaaaaz\=recursion_limit=10
 Failed: error -53: recursion limit exceeded
 /(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 3000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -47: match limit exceeded
    aaaaaaaaaaaaaz\=match_limit=60000
 Failed: error -47: match limit exceeded
 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 3000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -47: match limit exceeded
 /(*LIMIT_MATCH=60000)(a+)*zz/I
 Capturing subpattern count = 1
 Match limit = 60000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=match_limit=3000
 Failed: error -47: match limit exceeded
 /(*LIMIT_RECURSION=10)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 10
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 Failed: error -53: recursion limit exceeded
    aaaaaaaaaaaaaz\=recursion_limit=1000
 Failed: error -53: recursion limit exceeded
 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 1000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
 /(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
 Recursion limit = 1000
 Starting code units: a z 
 Last code unit = 'z'
 Subject length lower bound = 2
    aaaaaaaaaaaaaz
 No match
    aaaaaaaaaaaaaz\=recursion_limit=10
 Failed: error -53: recursion limit exceeded
 # These three have infinitely nested recursions. 
 /((?2))((?1))/
    abc
 Failed: error -52: nested recursion at the same subject position
 /((?(R2)a+|(?1)b))/
    aaaabcde
 Failed: error -52: nested recursion at the same subject position
 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
 Failed: error -52: nested recursion at the same subject position
 # The allusedtext modifier does not work with JIT, which does not maintain
 # the leftchar/rightchar data.
 /abc(?=xyz)/allusedtext
    abcxyzpqr
 0: abcxyz
       >>>
    abcxyzpqr\=aftertext
 0: abcxyz
       >>>
 0+ xyzpqr
 /(?<=pqr)abc(?=xyz)/allusedtext
    xyzpqrabcxyzpqr
 0: pqrabcxyz
    <<<   >>>
    xyzpqrabcxyzpqr\=aftertext
 0: pqrabcxyz
    <<<   >>>
 0+ xyzpqr
 /a\b/
    a.\=allusedtext
 0: a.
     >
    a\=allusedtext  
 0: a
 /abc\Kxyz/
    abcxyz\=allusedtext
 0: abcxyz
    <<<   
 /abc(?=xyz(*ACCEPT))/
    abcxyz\=allusedtext
 0: abcxyz
       >>>
 /abc(?=abcde)(?=ab)/allusedtext
    abcabcdefg
 0: abcabcde
       >>>>>
 # These tests provoke recursion loops, which give a different error message
 # when JIT is used.
 /(?R)/I
 Capturing subpattern count = 0
 May match empty string
 Subject length lower bound = 0
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(a|(?R))/I
 Capturing subpattern count = 1
 May match empty string
 Subject length lower bound = 1
    abcd
 0: a
 1: a
    defg 
 Failed: error -52: nested recursion at the same subject position
 /(ab|(bc|(de|(?R))))/I
 Capturing subpattern count = 3
 May match empty string
 Subject length lower bound = 2
    abcd
 0: ab
 1: ab
    fghi 
 Failed: error -52: nested recursion at the same subject position
 /(ab|(bc|(de|(?1))))/I
 Capturing subpattern count = 3
 May match empty string
 Subject length lower bound = 2
    abcd
 0: ab
 1: ab
    fghi 
 Failed: error -52: nested recursion at the same subject position
 /x(ab|(bc|(de|(?1)x)x)x)/I
 Capturing subpattern count = 3
 First code unit = 'x'
 Subject length lower bound = 3
    xab123
 0: xab
 1: ab
    xfghi 
 Failed: error -52: nested recursion at the same subject position
 /(?!\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
    =abc 
 Failed: error -52: nested recursion at the same subject position
 /(?=\w)(?R)/
    =abc 
 Failed: error -52: nested recursion at the same subject position
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(?<!\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(?<=\w)(?R)/
    abcd
 Failed: error -52: nested recursion at the same subject position
 /(a+|(?R)b)/
    aaa
 0: aaa
 1: aaa
    bbb 
 Failed: error -52: nested recursion at the same subject position
 /[^\xff]((?1))/BI
 ------------------------------------------------------------------
        Bra
        [^\x{ff}]
        CBra 1
        Recurse
        Ket
        Ket
        End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Subject length lower bound = 1
    abcd
 Failed: error -52: nested recursion at the same subject position
 # End of testinput15
--- a/testdata/testoutput16
+++ b/testdata/testoutput16
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@ -1,20 +1,148 @@
 # This set of tests is run only with the 8-bit library. It tests the POSIX
-# interface with UTF/UCP support, which is supported only with the 8-bit
+# interface, which is supported only with the 8-bit library. This test should
-# library. This test should not be run with JIT (which is not available for the
+# not be run with JIT (which is not available for the POSIX interface).
 # POSIX interface).
 #forbid_utf
 #pattern posix
-/a\x{1234}b/utf
+# Test invalid options
    a\x{1234}b
 0: a\x{1234}b
-/\w/
+/abc/auto_callout
-    +++\x{c2}
+** Ignored with POSIX interface: auto_callout
 /abc/
   abc\=find_limits
 ** Ignored with POSIX interface: find_limits
 0: abc
 /abc/
  abc\=partial_hard
 ** Ignored with POSIX interface: partial_hard
 0: abc
 # Real tests
 /abc/
    abc
 0: abc
    *** Failers
 No match: POSIX code 17: match failed
-/\w/ucp
+/^abc|def/
-    +++\x{c2}
+    abcdef
- 0: \xc2
+ 0: abc
    abcdef\=notbol
 0: def
-# End of testdata/testinput17
+/.*((abc)$|(def))/
    defabc
 0: defabc
 1: abc
 2: abc
    defabc\=noteol
 0: def
 1: def
 3: def
 /the quick brown fox/
    the quick brown fox
 0: the quick brown fox
    *** Failers
 No match: POSIX code 17: match failed
    The Quick Brown Fox
 No match: POSIX code 17: match failed
 /the quick brown fox/i
    the quick brown fox
 0: the quick brown fox
    The Quick Brown Fox
 0: The Quick Brown Fox
 /abc.def/
    *** Failers
 No match: POSIX code 17: match failed
    abc\ndef
 No match: POSIX code 17: match failed
 /abc$/
    abc
 0: abc
    abc\n
 0: abc
 /(abc)\2/
 Failed: POSIX code 15: bad back reference at offset 6     
 /(abc\1)/
    abc
 No match: POSIX code 17: match failed
 /a*(b+)(z)(z)/
    aaaabbbbzzzz
 0: aaaabbbbzz
 1: bbbb
 2: z
 3: z
    aaaabbbbzzzz\=ovector=0
 Matched without capture
    aaaabbbbzzzz\=ovector=1
 0: aaaabbbbzz
    aaaabbbbzzzz\=ovector=2
 0: aaaabbbbzz
 1: bbbb
 /ab.cd/
    ab-cd
 0: ab-cd
    ab=cd
 0: ab=cd
    ** Failers
 No match: POSIX code 17: match failed
    ab\ncd
 No match: POSIX code 17: match failed
 /ab.cd/s
    ab-cd
 0: ab-cd
    ab=cd
 0: ab=cd
    ab\ncd
 0: ab\x0acd
 /a(b)c/no_auto_capture
    abc
 Matched with REG_NOSUB
 /a(?P<name>b)c/no_auto_capture
    abc
 Matched with REG_NOSUB
 /a?|b?/
    abc
 0: a
    ** Failers
 0: 
    ddd\=notempty
 No match: POSIX code 17: match failed
 /\w+A/
   CDAAAAB
 0: CDAAAA
 /\w+A/ungreedy
   CDAAAAB
 0: CDA
 /\Biss\B/I,aftertext
 ** Ignored with POSIX interface: info
    Mississippi
 0: iss
 0+ issippi
 /abc/\
 Failed: POSIX code 9: bad escape sequence at offset 4     
 "(?(?C)"
 Failed: POSIX code 3: pattern error at offset 2     
 # End of testdata/testinput18
--- a/testdata/testoutput19
+++ b/testdata/testoutput19
@ -1,100 +1,20 @@
-# This set of tests exercises the serialization/deserialization functions in
+# This set of tests is run only with the 8-bit library. It tests the POSIX
-# the library. It does not use UTF or JIT.
+# interface with UTF/UCP support, which is supported only with the 8-bit
 # library. This test should not be run with JIT (which is not available for the
 # POSIX interface).
-#forbid_utf
+#pattern posix
-# Compile several patterns, push them onto the stack, and then write them
+/a\x{1234}b/utf
-# all to a file.
+    a\x{1234}b
 0: a\x{1234}b
-#pattern push
+/\w/
    +++\x{c2}
 No match: POSIX code 17: match failed
-/(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
+/\w/ucp
-  (?(DEFINE)
+    +++\x{c2}
-  (?<NAME_PAT>[a-z]+)
+ 0: \xc2
  (?<ADDRESS_PAT>\d+)
  )/x
 /^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
-#save testsaved1
+# End of testdata/testinput19
 # Do it again for some more patterns.
 /(*MARK:A)(*SKIP:B)(C|X)/mark
 ** Ignored when compiled pattern is stacked with 'push': mark
 /(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
 #save testsaved2
 #pattern -push
 # Reload the patterns, then pop them one by one and check them.
 #load testsaved1
 #load testsaved2
 #pop info
 Capturing subpattern count = 2
 Max back reference = 2
 Named capturing subpatterns:
  n   1
  n   2
 Options: dupnames
 Starting code units: b f 
 Subject length lower bound = 6
    foofoo             
 0: foofoo
 1: foo
    barbar
 0: barbar
 1: <unset>
 2: bar
 #pop mark
    C
 0: C
 1: C
 MK: A
    D 
 No match, mark = A
 #pop
    AmanaplanacanalPanama   
 0: AmanaplanacanalPanama
 1: <unset>
 2: <unset>
 3: AmanaplanacanalPanama
 4: A
 #pop info
 Capturing subpattern count = 4
 Named capturing subpatterns:
  ADDR          2
  ADDRESS_PAT   4
  NAME          1
  NAME_PAT      3
 Options: extended
 Subject length lower bound = 3
    metcalfe 33
 0: metcalfe 33
 1: metcalfe
 2: 33
 # Check for an error when different tables are used.
 /abc/push,tables=1
 /xyz/push,tables=2
 #save testsaved1
 Serialization failed: error -30: patterns do not all use the same character tables
 #pop
    xyz
 0: xyz
 #pop
    abc
 0: abc
 #pop should give an error
 ** Can't pop off an empty stack
    pqr
 # End of testinput19 
--- a/testdata/testoutput20
+++ b/testdata/testoutput20
@ -0,0 +1,100 @@
 # This set of tests exercises the serialization/deserialization functions in
 # the library. It does not use UTF or JIT.
 #forbid_utf
 # Compile several patterns, push them onto the stack, and then write them
 # all to a file.
 #pattern push
 /(?<NAME>(?&NAME_PAT))\s+(?<ADDR>(?&ADDRESS_PAT))
  (?(DEFINE)
  (?<NAME_PAT>[a-z]+)
  (?<ADDRESS_PAT>\d+)
  )/x
 /^(?:((.)(?1)\2|)|((.)(?3)\4|.))$/i
 #save testsaved1
 # Do it again for some more patterns.
 /(*MARK:A)(*SKIP:B)(C|X)/mark
 ** Ignored when compiled pattern is stacked with 'push': mark
 /(?:(?<n>foo)|(?<n>bar))\k<n>/dupnames
 #save testsaved2
 #pattern -push
 # Reload the patterns, then pop them one by one and check them.
 #load testsaved1
 #load testsaved2
 #pop info
 Capturing subpattern count = 2
 Max back reference = 2
 Named capturing subpatterns:
  n   1
  n   2
 Options: dupnames
 Starting code units: b f 
 Subject length lower bound = 6
    foofoo             
 0: foofoo
 1: foo
    barbar
 0: barbar
 1: <unset>
 2: bar
 #pop mark
    C
 0: C
 1: C
 MK: A
    D 
 No match, mark = A
 #pop
    AmanaplanacanalPanama   
 0: AmanaplanacanalPanama
 1: <unset>
 2: <unset>
 3: AmanaplanacanalPanama
 4: A
 #pop info
 Capturing subpattern count = 4
 Named capturing subpatterns:
  ADDR          2
  ADDRESS_PAT   4
  NAME          1
  NAME_PAT      3
 Options: extended
 Subject length lower bound = 3
    metcalfe 33
 0: metcalfe 33
 1: metcalfe
 2: 33
 # Check for an error when different tables are used.
 /abc/push,tables=1
 /xyz/push,tables=2
 #save testsaved1
 Serialization failed: error -30: patterns do not all use the same character tables
 #pop
    xyz
 0: xyz
 #pop
    abc
 0: abc
 #pop should give an error
 ** Can't pop off an empty stack
    pqr
 # End of testinput20