Add more tests.

2014-08-03 17:50:08 +00:00 · 2014-08-03 17:50:08 +00:00 · 8792477279
parent 2addfec25d
commit 8792477279
15 changed files with 24064 additions and 239 deletions
--- a/206
+++ b/206
@ -48,17 +48,16 @@
 # Define test titles in variables so that they can be output as a list. Some
 # of them are modified (e.g. with -8 or -16) when used in the actual tests.

-title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
+title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
 title2="Test 2: API, errors, internals, and non-Perl stuff"
 title3="Test 3: Locale-specific features"
 title4A="Test 4: UTF"
-title4B=" and Unicode property support (Compatible with Perl >= 5.10)"
-#title5="Test 5: API, internals, and non-Perl stuff for UTF"
-#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
-#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
-#title8="Test 8: DFA matching main functionality"
-#title9="Test 9: DFA matching with UTF"
-#title10="Test 10: DFA matching with Unicode properties"
+title4B=" and Unicode property support (compatible with Perl >= 5.10)"
+title5A="Test 5: API, internals, and non-Perl stuff for UTF"
+title5B=" and UCP support"
+title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
+title7A="Test 7: DFA matching with UTF"
+title7B=" and Unicode property support"
 #title11="Test 11: Internal offsets and code size tests"
 #title12="Test 12: JIT-specific features (when JIT is available)"
 #title13="Test 13: JIT-specific features (when JIT is not available)"
@ -80,12 +79,12 @@ maxtest=2

 if [ $# -eq 1 -a "$1" = "list" ]; then
  echo $title1
-  echo $title2 "(not UTF)"
+  echo $title2 "(not UTF or UCP)"
  echo $title3
  echo $title4A $title4B
-#  echo $title5 support
-#  echo $title6
-#  echo $title7
+  echo $title5A $title5B
+  echo $title6
+  echo $title7A $title7B
 #  echo $title8
 #  echo $title9
 #  echo $title10
@ -176,9 +175,9 @@ do1=no
 do2=no
 do3=no
 do4=no
-#do5=no
-#do6=no
-#do7=no
+do5=no
+do6=no
+do7=no
 #do8=no
 #do9=no
 #do10=no
@ -205,9 +204,9 @@ while [ $# -gt 0 ] ; do
    2) do2=yes;;
    3) do3=yes;;
    4) do4=yes;;
-#    5) do5=yes;;
-#    6) do6=yes;;
-#    7) do7=yes;;
+    5) do5=yes;;
+    6) do6=yes;;
+    7) do7=yes;;
 #    8) do8=yes;;
 #    9) do9=yes;;
 #   10) do10=yes;;
@ -346,9 +345,10 @@ fi
 # If no specific tests were requested, select all. Those that are not
 # relevant will be automatically skipped.

-if [ $do1  = no -a $do2  = no -a $do3  = no -a $do4  = no \
+if [ $do1  = no -a $do2  = no -a $do3  = no -a $do4  = no -a \
+     $do5  = no -a $do6  = no -a $do7  = no \
   ]; then
-#     -a $do5  = no -a $do6  = no -a $do7  = no -a $do8  = no -a \
+#     -a $do8  = no -a \
 #     $do9  = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
 #     $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
 #     $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
@ -359,9 +359,9 @@ if [ $do1  = no -a $do2  = no -a $do3  = no -a $do4  = no \
  do2=yes
  do3=yes
  do4=yes
-#  do5=yes
-#  do6=yes
-#  do7=yes
+  do5=yes
+  do6=yes
+  do7=yes
 #  do8=yes
 #  do9=yes
 #  do10=yes
@ -425,7 +425,7 @@ fi
 # PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals

 if [ $do2 = yes ] ; then
-  echo $title2 "(not UTF-$bits)"
+  echo $title2 "(excluding UTF-$bits)"
  for opt in "" $jitopt; do
    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
    if [ $? = 0 ] ; then
@ -537,117 +537,53 @@ if [ $do4 = yes ] ; then
  fi
 fi

-#if [ $do5 = yes ] ; then
-#  echo ${title5}-${bits} support
-#  if [ $utf -eq 0 ] ; then
-#    echo "  Skipped because UTF-$bits support is not available"
-#  else
-#    for opt in "" "-s" $jitopt; do
-#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
-#      if [ $? = 0 ] ; then
-#        $cf $testdata/testoutput5 testtry
-#        if [ $? != 0 ] ; then exit 1; fi
-#      else exit 1
-#      fi
-#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
-#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
-#      else echo "  OK"
-#      fi
-#    done
-#  fi
-#fi
-#
-#if [ $do6 = yes ] ; then
-#  echo $title6
-#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
-#    echo "  Skipped because Unicode property support is not available"
-#  else
-#    for opt in "" "-s" $jitopt; do
-#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
-#      if [ $? = 0 ] ; then
-#        $cf $testdata/testoutput6 testtry
-#        if [ $? != 0 ] ; then exit 1; fi
-#      else exit 1
-#      fi
-#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
-#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
-#      else echo "  OK"
-#      fi
-#    done
-#  fi
-#fi
-#
-## Test non-Perl-compatible Unicode property support
-#
-#if [ $do7 = yes ] ; then
-#  echo $title7
-#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
-#    echo "  Skipped because Unicode property support is not available"
-#  else
-#    for opt in "" "-s" $jitopt; do
-#      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
-#      if [ $? = 0 ] ; then
-#        $cf $testdata/testoutput7 testtry
-#        if [ $? != 0 ] ; then exit 1; fi
-#      else exit 1
-#      fi
-#      if [ "$opt" = "-s" ] ; then echo "  OK with study"
-#      elif [ "$opt" = "-s+" ] ; then echo "  OK with JIT study"
-#      else echo "  OK"
-#      fi
-#    done
-#  fi
-#fi
-#
-## Tests for DFA matching support
-#
-#if [ $do8 = yes ] ; then
-#  echo $title8
-#  for opt in "" "-s"; do
-#    $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
-#    if [ $? = 0 ] ; then
-#      $cf $testdata/testoutput8 testtry
-#      if [ $? != 0 ] ; then exit 1; fi
-#    else exit 1
-#    fi
-#    if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
-#  done
-#fi
-#
-#if [ $do9 = yes ] ; then
-#  echo ${title9}-${bits}
-#  if [ $utf -eq 0 ] ; then
-#    echo "  Skipped because UTF-$bits support is not available"
-#  else
-#    for opt in "" "-s"; do
-#      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
-#      if [ $? = 0 ] ; then
-#        $cf $testdata/testoutput9 testtry
-#        if [ $? != 0 ] ; then exit 1; fi
-#      else exit 1
-#      fi
-#      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
-#    done
-#  fi
-#fi
-#
-#if [ $do10 = yes ] ; then
-#  echo $title10
-#  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
-#    echo "  Skipped because Unicode property support is not available"
-#  else
-#    for opt in "" "-s"; do
-#      $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
-#      if [ $? = 0 ] ; then
-#        $cf $testdata/testoutput10 testtry
-#        if [ $? != 0 ] ; then exit 1; fi
-#      else exit 1
-#      fi
-#      if [ "$opt" = "-s" ] ; then echo "  OK with study" ; else echo "  OK"; fi
-#    done
-#  fi
-#fi
-#
+if [ $do5 = yes ] ; then
+  echo ${title5A}-${bits}$title5B
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF-$bits support is not available"
+  else
+    for opt in "" $jitopt; do
+      $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
+      if [ $? = 0 ] ; then
+        $cf $testdata/testoutput5 testtry
+        if [ $? != 0 ] ; then exit 1; fi
+      else exit 1
+      fi
+      if [ "$opt" = "-jit" ] ; then echo "  OK with JIT"
+      else echo "  OK"
+      fi
+    done
+  fi
+fi
+
+# Tests for DFA matching support
+
+if [ $do6 = yes ] ; then
+  echo $title6
+  $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
+  if [ $? = 0 ] ; then
+    $cf $testdata/testoutput6 testtry
+    if [ $? != 0 ] ; then exit 1; fi
+  else exit 1
+  fi
+  echo "  OK"
+fi
+
+if [ $do7 = yes ] ; then
+  echo ${title7A}-${bits}$title7B
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF-$bits support is not available"
+  else
+    $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
+    if [ $? = 0 ] ; then
+      $cf $testdata/testoutput7 testtry
+      if [ $? != 0 ] ; then exit 1; fi
+    else exit 1
+    fi
+    echo "  OK"
+  fi
+fi
+
 ## Test of internal offsets and code sizes. This test is run only when there
 ## is Unicode property support and the link size is 2. The actual tests are
 ## mostly the same as in some of the above, but in this test we inspect some
--- a/src/pcre2.h
+++ b/src/pcre2.h
@ -123,19 +123,21 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_JIT_PARTIAL_SOFT    0x00000002
 #define PCRE2_JIT_PARTIAL_HARD    0x00000004

-/* These are for pcre2_match() and pcre2_dfa_match(). */
+/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
+PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
+functions, so take care not to define synonyms by mistake. */

-#define PCRE2_NOTBOL              0x00000001
-#define PCRE2_NOTEOL              0x00000002
-#define PCRE2_NOTEMPTY            0x00000004
-#define PCRE2_NOTEMPTY_ATSTART    0x00000008
-#define PCRE2_PARTIAL_SOFT        0x00000010
-#define PCRE2_PARTIAL_HARD        0x00000020
+#define PCRE2_NOTBOL              0x00000008
+#define PCRE2_NOTEOL              0x00000010
+#define PCRE2_NOTEMPTY            0x00000020
+#define PCRE2_NOTEMPTY_ATSTART    0x00000040
+#define PCRE2_PARTIAL_SOFT        0x00000080
+#define PCRE2_PARTIAL_HARD        0x00000100

 /* These are additional options for pcre2_dfa_match(). */

-#define PCRE2_DFA_RESTART         0x00000040
-#define PCRE2_DFA_SHORTEST        0x00000080
+#define PCRE2_DFA_RESTART         0x00000200
+#define PCRE2_DFA_SHORTEST        0x00000400

 /* Newline and \R settings, for use in the compile and match contexts. The
 newline values must be kept in step with values set in config.h and both sets
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -123,19 +123,21 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_JIT_PARTIAL_SOFT    0x00000002
 #define PCRE2_JIT_PARTIAL_HARD    0x00000004

-/* These are for pcre2_match() and pcre2_dfa_match(). */
+/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
+PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
+functions, so take care not to define synonyms by mistake. */

-#define PCRE2_NOTBOL              0x00000001
-#define PCRE2_NOTEOL              0x00000002
-#define PCRE2_NOTEMPTY            0x00000004
-#define PCRE2_NOTEMPTY_ATSTART    0x00000008
-#define PCRE2_PARTIAL_SOFT        0x00000010
-#define PCRE2_PARTIAL_HARD        0x00000020
+#define PCRE2_NOTBOL              0x00000008
+#define PCRE2_NOTEOL              0x00000010
+#define PCRE2_NOTEMPTY            0x00000020
+#define PCRE2_NOTEMPTY_ATSTART    0x00000040
+#define PCRE2_PARTIAL_SOFT        0x00000080
+#define PCRE2_PARTIAL_HARD        0x00000100

 /* These are additional options for pcre2_dfa_match(). */

-#define PCRE2_DFA_RESTART         0x00000040
-#define PCRE2_DFA_SHORTEST        0x00000080
+#define PCRE2_DFA_RESTART         0x00000200
+#define PCRE2_DFA_SHORTEST        0x00000400

 /* Newline and \R settings, for use in the compile and match contexts. The
 newline values must be kept in step with values set in config.h and both sets
--- a/src/pcre2_byte_order.c
+++ b/src/pcre2_byte_order.c
@ -107,14 +107,14 @@ return -1;

 REAL_PCRE *re = (REAL_PCRE *)argument_re;
 pcre_study_data *study;
-#ifndef COMPILE_PCRE8
+#if PCRE2_CODE_UNIT_WIDTH != 8
 pcre_uchar *ptr;
 int length;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
 BOOL utf;
 BOOL utf16_char;
-#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
-#endif /* !COMPILE_PCRE8 */
+#endif
+#endif

 if (re == NULL) return PCRE_ERROR_NULL;
 if (re->magic_number == MAGIC_NUMBER)
@ -134,10 +134,10 @@ re->flags = swap_uint32(re->flags);
 re->limit_match = swap_uint32(re->limit_match);
 re->limit_recursion = swap_uint32(re->limit_recursion);

-#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
+#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
 re->first_char = swap_uint16(re->first_char);
 re->req_char = swap_uint16(re->req_char);
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
 re->first_char = swap_uint32(re->first_char);
 re->req_char = swap_uint32(re->req_char);
 #endif
@ -159,27 +159,27 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
  study->minlength = swap_uint32(study->minlength);
  }

-#ifndef COMPILE_PCRE8
+#if PCRE2_CODE_UNIT_WIDTH != 8
 ptr = (pcre_uchar *)re + re->name_table_offset;
 length = re->name_count * re->name_entry_size;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
 utf = (re->options & PCRE_UTF16) != 0;
 utf16_char = FALSE;
-#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
+#endif

 while(TRUE)
  {
  /* Swap previous characters. */
  while (length-- > 0)
    {
-#if defined COMPILE_PCRE16
+#if PCRE2_CODE_UNIT_WIDTH == 16
    *ptr = swap_uint16(*ptr);
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
    *ptr = swap_uint32(*ptr);
 #endif
    ptr++;
    }
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
  if (utf16_char)
    {
    if (HAS_EXTRALEN(ptr[-1]))
@ -194,9 +194,9 @@ while(TRUE)

  /* Get next opcode. */
  length = 0;
-#if defined COMPILE_PCRE16
+#if PCRE2_CODE_UNIT_WIDTH == 16
  *ptr = swap_uint16(*ptr);
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
  *ptr = swap_uint32(*ptr);
 #endif
  switch (*ptr)
@ -204,7 +204,7 @@ while(TRUE)
    case OP_END:
    return 0;

-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
    case OP_CHAR:
    case OP_CHARI:
    case OP_NOT:
@ -279,12 +279,12 @@ while(TRUE)
    case OP_XCLASS:
    /* Reverse the size of the XCLASS instance. */
    ptr++;
-#if defined COMPILE_PCRE16
+#if PCRE2_CODE_UNIT_WIDTH == 16
    *ptr = swap_uint16(*ptr);
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
    *ptr = swap_uint32(*ptr);
 #endif
-#ifndef COMPILE_PCRE32
+#if PCRE2_CODE_UNIT_WIDTH != 32
    if (LINK_SIZE > 1)
      {
      /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
@ -294,9 +294,9 @@ while(TRUE)
 #endif
    ptr++;
    length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
-#if defined COMPILE_PCRE16
+#if PCRE2_CODE_UNIT_WIDTH == 16
    *ptr = swap_uint16(*ptr);
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
    *ptr = swap_uint32(*ptr);
 #endif
    if ((*ptr & XCL_MAP) != 0)
@ -310,7 +310,7 @@ while(TRUE)
  ptr++;
  }
 /* Control should never reach here in 16/32 bit mode. */
-#endif /* !COMPILE_PCRE8 */
+#endif


 #endif  /* NEVER */
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -54,21 +54,22 @@ POSSIBILITY OF SUCH DAMAGE.
 by defining macros in order to minimize #if usage. */

 #if PCRE2_CODE_UNIT_WIDTH == 8
-#define STRING_UTFn_RIGHTPAR        STRING_UTF8_RIGHTPAR, 5
-#define XDIGIT(c)                   xdigitab[c]
+#define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
+#define STRING_UTFn_RIGHTPAR     STRING_UTF8_RIGHTPAR, 5
+#define XDIGIT(c)                xdigitab[c]

 #else  /* Either 16-bit or 32-bit */
-#define XDIGIT(c)                   (MAX_255(c)? xdigitab[c] : 0xff)
+#define XDIGIT(c)                (MAX_255(c)? xdigitab[c] : 0xff)

 #if PCRE2_CODE_UNIT_WIDTH == 16
-#define STRING_UTFn_RIGHTPAR        STRING_UTF16_RIGHTPAR, 6
+#define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
+#define STRING_UTFn_RIGHTPAR     STRING_UTF16_RIGHTPAR, 6

-#else
-#define STRING_UTFn_RIGHTPAR        STRING_UTF32_RIGHTPAR, 6
+#else  /* 33-bit */
+#define STRING_UTFn_RIGHTPAR     STRING_UTF32_RIGHTPAR, 6
 #endif
 #endif

-
 /* Function definitions to allow mutual recursion */

 static int
@ -1308,7 +1309,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
    actual length is stored in the compiled code, so we must update "code"
    here. */

-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
    case OP_XCLASS:
    ccode = code += GET(code, 1);
    goto CHECK_CLASS_REPEAT;
@ -1318,7 +1319,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
    case OP_NCLASS:
    ccode = code + PRIV(OP_lengths)[OP_CLASS];

-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
    CHECK_CLASS_REPEAT:
 #endif

@ -1875,7 +1876,7 @@ else
    c -= CHAR_0;
    while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
        c = c * 8 + *(++ptr) - CHAR_0;
-#ifdef COMPILE_PCRE8
+#if PCRE2_CODE_UNIT_WIDTH == 8
    if (!utf && c > 0xff) *errorcodeptr = ERR51;
 #endif
    break;
@ -1894,15 +1895,15 @@ else
        {
        cc = *ptr++;
        if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
-#ifdef COMPILE_PCRE32
+#if PCRE2_CODE_UNIT_WIDTH == 32
        if (c >= 0x20000000l) { overflow = TRUE; break; }
 #endif
        c = (c << 3) + cc - CHAR_0 ;
-#if defined COMPILE_PCRE8
+#if PCRE2_CODE_UNIT_WIDTH == 8
        if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
-#elif defined COMPILE_PCRE16
+#elif PCRE2_CODE_UNIT_WIDTH == 16
        if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
-#elif defined COMPILE_PCRE32
+#elif PCRE2_CODE_UNIT_WIDTH == 32
        if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
 #endif
        }
@ -2241,7 +2242,7 @@ PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
 for (;;)
  {
  register PCRE2_UCHAR c = *code;
-
+  
  if (c == OP_END) return NULL;

  /* XCLASS is used for classes that cannot be represented just by a bit
@ -3039,7 +3040,6 @@ dynamically as we process the pattern. */
 #ifdef SUPPORT_UTF
 BOOL utf = (options & PCRE2_UTF) != 0;
 #if PCRE2_CODE_UNIT_WIDTH != 32
-#define MAYBE_UTF_MULTI        /* UTF chars may use multiple code units */
 PCRE2_UCHAR utf_units[6];      /* For setting up multi-cu chars */
 #endif

@ -7608,7 +7608,7 @@ help in the case when a regex compiled on a system with 4-byte pointers is run
 on another with 8-byte pointers. */

 #ifdef FIXME
-#ifdef COMPILE_PCRE32
+#if PCRE2_CODE_UNIT_WIDTH == 32
 re->dummy = 0;
 #else
 re->dummy1 = re->dummy2 = re->dummy3 = 0;
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@ -632,7 +632,7 @@ for (;;)

    /* If this opcode inspects a character, but we are at the end of the
    subject, remember the fact for use when testing for a partial match. */
-
+    
    if (clen == 0 && poptable[codevalue] != 0)
      could_continue = TRUE;

@ -1400,7 +1400,7 @@ for (;;)
          case 0x2028:
          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
+          if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
          goto ANYNL01;

          case CHAR_CR:
@ -1669,7 +1669,7 @@ for (;;)
          case 0x2028:
          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
+          if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
          goto ANYNL02;

          case CHAR_CR:
@ -1939,7 +1939,7 @@ for (;;)
          case 0x2028:
          case 0x2029:
 #endif  /* Not EBCDIC */
-          if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
+          if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
          goto ANYNL03;

          case CHAR_CR:
@ -2121,7 +2121,7 @@ for (;;)
        case 0x2028:
        case 0x2029:
 #endif  /* Not EBCDIC */
-        if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
+        if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;

        case CHAR_LF:
        ADD_NEW(state_offset + 1, 0);
@ -2985,7 +2985,7 @@ for (;;)

  The "could_continue" variable is true if a state could have continued but
  for the fact that the end of the subject was reached. */
-
+  
  if (new_count <= 0)
    {
    if (rlevel == 1 &&                               /* Top level, and */
@ -3378,7 +3378,7 @@ for (;;)

    /* The following two optimizations are disabled for partial matching. */

-    if ((mb->moptions & PCRE2_PARTIAL_HARD & PCRE2_PARTIAL_SOFT) == 0)
+    if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
      {
      /* The minimum matching length is a lower bound; no actual string of that
      length may actually match the pattern. Although the value is, strictly,
@ -3461,7 +3461,7 @@ for (;;)

  /* Anything other than "no match" means we are done, always; otherwise, carry
  on only if not anchored. */
-
+  
  if (rc != PCRE2_ERROR_NOMATCH || anchored)
    {
    if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
@ -3470,6 +3470,8 @@ for (;;)
      match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject);
      }
    match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject);
+    match_data->rightchar = 0; /* FIXME */
+    match_data->startchar = (PCRE2_OFFSET)(start_match - subject);  
    match_data->rc = rc; 
    return rc;
    }
--- a/src/pcre2_printint.c
+++ b/src/pcre2_printint.c
@ -90,30 +90,26 @@ static unsigned int
 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
 {
 uint32_t c = *ptr;
-#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
-int a, i, s;
-#endif
+BOOL one_code_unit = !utf;

-/* If UTF is supported and requested, check for a one-code-unit character. The 
-16-bit and 32-bit tests are for malformed UTF, and should only trigger if the 
-sanity check is turned off. */
+/* If UTF is supported and requested, check for a valid single code unit. */

 #ifdef SUPPORT_UTF
 if (utf)
  {
 #if PCRE2_CODE_UNIT_WIDTH == 8
-  utf = (c & 0xc0) == 0xc0;
+  one_code_unit = c < 0x80;
 #elif PCRE2_CODE_UNIT_WIDTH == 16
-  utf = (c & 0xfc00) == 0xd800;
+  one_code_unit = (c & 0xfc00) != 0xd800;
 #else
-  utf = (c & 0xfffff800u) != 0xd800u;
+  one_code_unit = (c & 0xfffff800u) != 0xd800u;
 #endif
  }
 #endif  /* SUPPORT_UTF */  

-/* Handle a one-code-unit character at any width. */
+/* Handle a valid one-code-unit character at any width. */

-if (!utf)
+if (one_code_unit)
  {
  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
  else if (c < 0x80) fprintf(f, "\\x%02x", c);
@ -121,41 +117,43 @@ if (!utf)
  return 0;
  } 

-/* Per-width code for handling non-one-code-unit UTF characters. */
+/* Per-width code for invalid UTF code units and multi-unit UTF characters. */

 #ifdef SUPPORT_UTF

-/* Handle a multi-byte UTF-8 character. */
+/* Malformed UTF-8 should occur only if the sanity check has been turned off.
+Rather than swallow random bytes, just stop if we hit a bad one. Print it with
+\X instead of \x as an indication. */

 #if PCRE2_CODE_UNIT_WIDTH == 8
-a = utf8_table4[c & 0x3f];  /* Number of additional bytes */
-s = 6*a;
-c = (c & utf8_table3[a]) << s;
-for (i = 1; i <= a; i++)
+if ((c & 0xc0) != 0xc0)
  {
-  /* This is a check for malformed UTF-8; it should only occur if the sanity
-  check has been turned off. Rather than swallow random bytes, just stop if
-  we hit a bad one. Print it with \X instead of \x as an indication. */
-
-  if ((ptr[i] & 0xc0) != 0x80)
+  fprintf(f, "\\X{%x}", c);       /* Invalid starting byte */
+  return 0;
+  }  
+else
+  {
+  int i; 
+  int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */
+  int s = 6*a;
+  c = (c & utf8_table3[a]) << s;
+  for (i = 1; i <= a; i++)
    {
-    fprintf(f, "\\X{%x}", c);
-    return i - 1;
+    if ((ptr[i] & 0xc0) != 0x80)
+      {
+      fprintf(f, "\\X{%x}", c);   /* Invalid secondary byte */
+      return i - 1;
+      }
+    s -= 6;
+    c |= (ptr[i] & 0x3f) << s;
    }
-
-  /* The byte is OK */
-
-  s -= 6;
-  c |= (ptr[i] & 0x3f) << s;
-  }
-fprintf(f, "\\x{%x}", c);
-return a;
+  fprintf(f, "\\x{%x}", c);
+  return a;
+}   
 #endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */

-/* Handle a multi-code-unit UTF-16 character, starting with a check for
-malformed UTF-16; it should only occur if the sanity check has been turned off.
-Rather than swallow a low surrogate, just stop if we hit a bad one. Print it
-with \X instead of \x as an indication. */
+/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
+Print it with \X instead of \x as an indication. */

 #if PCRE2_CODE_UNIT_WIDTH == 16
 if ((ptr[1] & 0xfc00) != 0xdc00)
@ -176,7 +174,7 @@ as an indication. */
 fprintf(f, "\\X{%x}", c);
 return 0;
 #endif  /* PCRE2_CODE_UNIT_WIDTH == 32 */
-#endif /* SUPPORT_UTF */
+#endif  /* SUPPORT_UTF */
 }


--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@ -751,7 +751,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
 register uint32_t c;
 int yield = SSB_DONE;

-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
 int table_limit = utf? 16:32;
 #else
 int table_limit = 32;
--- a/testdata/testinput5
+++ b/testdata/testinput5
--- a/testdata/testinput6
+++ b/testdata/testinput6
--- a/testdata/testinput7
+++ b/testdata/testinput7
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -47,7 +47,7 @@ Subject length lower bound = 3
    abc
 0: abc
    abc\=anchored
-No match
+ 0: abc
    *** Failers
 No match
    defabc
@ -352,7 +352,7 @@ Subject length lower bound = 3
    abcdef
 0: abc
    abcdef\=notbol
-No match
+ 0: def

 /.*((abc)$|(def))/I
 Capturing subpattern count = 3
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
--- a/testdata/testoutput7
+++ b/testdata/testoutput7