Add more tests.

This commit is contained in:
Philip.Hazel 2014-08-03 17:50:08 +00:00
parent 2addfec25d
commit 8792477279
15 changed files with 24064 additions and 239 deletions

206
RunTest
View File

@ -48,17 +48,16 @@
# Define test titles in variables so that they can be output as a list. Some # Define test titles in variables so that they can be output as a list. Some
# of them are modified (e.g. with -8 or -16) when used in the actual tests. # of them are modified (e.g. with -8 or -16) when used in the actual tests.
title1="Test 1: Main functionality (Compatible with Perl >= 5.10)" title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
title2="Test 2: API, errors, internals, and non-Perl stuff" title2="Test 2: API, errors, internals, and non-Perl stuff"
title3="Test 3: Locale-specific features" title3="Test 3: Locale-specific features"
title4A="Test 4: UTF" title4A="Test 4: UTF"
title4B=" and Unicode property support (Compatible with Perl >= 5.10)" title4B=" and Unicode property support (compatible with Perl >= 5.10)"
#title5="Test 5: API, internals, and non-Perl stuff for UTF" title5A="Test 5: API, internals, and non-Perl stuff for UTF"
#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)" title5B=" and UCP support"
#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support" title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
#title8="Test 8: DFA matching main functionality" title7A="Test 7: DFA matching with UTF"
#title9="Test 9: DFA matching with UTF" title7B=" and Unicode property support"
#title10="Test 10: DFA matching with Unicode properties"
#title11="Test 11: Internal offsets and code size tests" #title11="Test 11: Internal offsets and code size tests"
#title12="Test 12: JIT-specific features (when JIT is available)" #title12="Test 12: JIT-specific features (when JIT is available)"
#title13="Test 13: JIT-specific features (when JIT is not available)" #title13="Test 13: JIT-specific features (when JIT is not available)"
@ -80,12 +79,12 @@ maxtest=2
if [ $# -eq 1 -a "$1" = "list" ]; then if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title1 echo $title1
echo $title2 "(not UTF)" echo $title2 "(not UTF or UCP)"
echo $title3 echo $title3
echo $title4A $title4B echo $title4A $title4B
# echo $title5 support echo $title5A $title5B
# echo $title6 echo $title6
# echo $title7 echo $title7A $title7B
# echo $title8 # echo $title8
# echo $title9 # echo $title9
# echo $title10 # echo $title10
@ -176,9 +175,9 @@ do1=no
do2=no do2=no
do3=no do3=no
do4=no do4=no
#do5=no do5=no
#do6=no do6=no
#do7=no do7=no
#do8=no #do8=no
#do9=no #do9=no
#do10=no #do10=no
@ -205,9 +204,9 @@ while [ $# -gt 0 ] ; do
2) do2=yes;; 2) do2=yes;;
3) do3=yes;; 3) do3=yes;;
4) do4=yes;; 4) do4=yes;;
# 5) do5=yes;; 5) do5=yes;;
# 6) do6=yes;; 6) do6=yes;;
# 7) do7=yes;; 7) do7=yes;;
# 8) do8=yes;; # 8) do8=yes;;
# 9) do9=yes;; # 9) do9=yes;;
# 10) do10=yes;; # 10) do10=yes;;
@ -346,9 +345,10 @@ fi
# If no specific tests were requested, select all. Those that are not # If no specific tests were requested, select all. Those that are not
# relevant will be automatically skipped. # relevant will be automatically skipped.
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no \ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
$do5 = no -a $do6 = no -a $do7 = no \
]; then ]; then
# -a $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \ # -a $do8 = no -a \
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \ # $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \ # $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \ # $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
@ -359,9 +359,9 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no \
do2=yes do2=yes
do3=yes do3=yes
do4=yes do4=yes
# do5=yes do5=yes
# do6=yes do6=yes
# do7=yes do7=yes
# do8=yes # do8=yes
# do9=yes # do9=yes
# do10=yes # do10=yes
@ -425,7 +425,7 @@ fi
# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals # PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
if [ $do2 = yes ] ; then if [ $do2 = yes ] ; then
echo $title2 "(not UTF-$bits)" echo $title2 "(excluding UTF-$bits)"
for opt in "" $jitopt; do for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then if [ $? = 0 ] ; then
@ -537,117 +537,53 @@ if [ $do4 = yes ] ; then
fi fi
fi fi
#if [ $do5 = yes ] ; then if [ $do5 = yes ] ; then
# echo ${title5}-${bits} support echo ${title5A}-${bits}$title5B
# if [ $utf -eq 0 ] ; then if [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available" echo " Skipped because UTF-$bits support is not available"
# else else
# for opt in "" "-s" $jitopt; do for opt in "" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
# if [ $? = 0 ] ; then if [ $? = 0 ] ; then
# $cf $testdata/testoutput5 testtry $cf $testdata/testoutput5 testtry
# if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
# else exit 1 else exit 1
# fi fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study" else echo " OK"
# else echo " OK" fi
# fi done
# done fi
# fi fi
#fi
# # Tests for DFA matching support
#if [ $do6 = yes ] ; then
# echo $title6 if [ $do6 = yes ] ; then
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then echo $title6
# echo " Skipped because Unicode property support is not available" $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
# else if [ $? = 0 ] ; then
# for opt in "" "-s" $jitopt; do $cf $testdata/testoutput6 testtry
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry if [ $? != 0 ] ; then exit 1; fi
# if [ $? = 0 ] ; then else exit 1
# $cf $testdata/testoutput6 testtry fi
# if [ $? != 0 ] ; then exit 1; fi echo " OK"
# else exit 1 fi
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" if [ $do7 = yes ] ; then
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study" echo ${title7A}-${bits}$title7B
# else echo " OK" if [ $utf -eq 0 ] ; then
# fi echo " Skipped because UTF-$bits support is not available"
# done else
# fi $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
#fi if [ $? = 0 ] ; then
# $cf $testdata/testoutput7 testtry
## Test non-Perl-compatible Unicode property support if [ $? != 0 ] ; then exit 1; fi
# else exit 1
#if [ $do7 = yes ] ; then fi
# echo $title7 echo " OK"
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then fi
# echo " Skipped because Unicode property support is not available" fi
# else
# for opt in "" "-s" $jitopt; do
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput7 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study"
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
# else echo " OK"
# fi
# done
# fi
#fi
#
## Tests for DFA matching support
#
#if [ $do8 = yes ] ; then
# echo $title8
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput8 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
#fi
#
#if [ $do9 = yes ] ; then
# echo ${title9}-${bits}
# if [ $utf -eq 0 ] ; then
# echo " Skipped because UTF-$bits support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput9 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
#if [ $do10 = yes ] ; then
# echo $title10
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
# echo " Skipped because Unicode property support is not available"
# else
# for opt in "" "-s"; do
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
# if [ $? = 0 ] ; then
# $cf $testdata/testoutput10 testtry
# if [ $? != 0 ] ; then exit 1; fi
# else exit 1
# fi
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
# done
# fi
#fi
#
## Test of internal offsets and code sizes. This test is run only when there ## Test of internal offsets and code sizes. This test is run only when there
## is Unicode property support and the link size is 2. The actual tests are ## is Unicode property support and the link size is 2. The actual tests are
## mostly the same as in some of the above, but in this test we inspect some ## mostly the same as in some of the above, but in this test we inspect some

View File

@ -123,19 +123,21 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002 #define PCRE2_JIT_PARTIAL_SOFT 0x00000002
#define PCRE2_JIT_PARTIAL_HARD 0x00000004 #define PCRE2_JIT_PARTIAL_HARD 0x00000004
/* These are for pcre2_match() and pcre2_dfa_match(). */ /* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
functions, so take care not to define synonyms by mistake. */
#define PCRE2_NOTBOL 0x00000001 #define PCRE2_NOTBOL 0x00000008
#define PCRE2_NOTEOL 0x00000002 #define PCRE2_NOTEOL 0x00000010
#define PCRE2_NOTEMPTY 0x00000004 #define PCRE2_NOTEMPTY 0x00000020
#define PCRE2_NOTEMPTY_ATSTART 0x00000008 #define PCRE2_NOTEMPTY_ATSTART 0x00000040
#define PCRE2_PARTIAL_SOFT 0x00000010 #define PCRE2_PARTIAL_SOFT 0x00000080
#define PCRE2_PARTIAL_HARD 0x00000020 #define PCRE2_PARTIAL_HARD 0x00000100
/* These are additional options for pcre2_dfa_match(). */ /* These are additional options for pcre2_dfa_match(). */
#define PCRE2_DFA_RESTART 0x00000040 #define PCRE2_DFA_RESTART 0x00000200
#define PCRE2_DFA_SHORTEST 0x00000080 #define PCRE2_DFA_SHORTEST 0x00000400
/* Newline and \R settings, for use in the compile and match contexts. The /* Newline and \R settings, for use in the compile and match contexts. The
newline values must be kept in step with values set in config.h and both sets newline values must be kept in step with values set in config.h and both sets

View File

@ -123,19 +123,21 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002 #define PCRE2_JIT_PARTIAL_SOFT 0x00000002
#define PCRE2_JIT_PARTIAL_HARD 0x00000004 #define PCRE2_JIT_PARTIAL_HARD 0x00000004
/* These are for pcre2_match() and pcre2_dfa_match(). */ /* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
functions, so take care not to define synonyms by mistake. */
#define PCRE2_NOTBOL 0x00000001 #define PCRE2_NOTBOL 0x00000008
#define PCRE2_NOTEOL 0x00000002 #define PCRE2_NOTEOL 0x00000010
#define PCRE2_NOTEMPTY 0x00000004 #define PCRE2_NOTEMPTY 0x00000020
#define PCRE2_NOTEMPTY_ATSTART 0x00000008 #define PCRE2_NOTEMPTY_ATSTART 0x00000040
#define PCRE2_PARTIAL_SOFT 0x00000010 #define PCRE2_PARTIAL_SOFT 0x00000080
#define PCRE2_PARTIAL_HARD 0x00000020 #define PCRE2_PARTIAL_HARD 0x00000100
/* These are additional options for pcre2_dfa_match(). */ /* These are additional options for pcre2_dfa_match(). */
#define PCRE2_DFA_RESTART 0x00000040 #define PCRE2_DFA_RESTART 0x00000200
#define PCRE2_DFA_SHORTEST 0x00000080 #define PCRE2_DFA_SHORTEST 0x00000400
/* Newline and \R settings, for use in the compile and match contexts. The /* Newline and \R settings, for use in the compile and match contexts. The
newline values must be kept in step with values set in config.h and both sets newline values must be kept in step with values set in config.h and both sets

View File

@ -107,14 +107,14 @@ return -1;
REAL_PCRE *re = (REAL_PCRE *)argument_re; REAL_PCRE *re = (REAL_PCRE *)argument_re;
pcre_study_data *study; pcre_study_data *study;
#ifndef COMPILE_PCRE8 #if PCRE2_CODE_UNIT_WIDTH != 8
pcre_uchar *ptr; pcre_uchar *ptr;
int length; int length;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 #if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
BOOL utf; BOOL utf;
BOOL utf16_char; BOOL utf16_char;
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ #endif
#endif /* !COMPILE_PCRE8 */ #endif
if (re == NULL) return PCRE_ERROR_NULL; if (re == NULL) return PCRE_ERROR_NULL;
if (re->magic_number == MAGIC_NUMBER) if (re->magic_number == MAGIC_NUMBER)
@ -134,10 +134,10 @@ re->flags = swap_uint32(re->flags);
re->limit_match = swap_uint32(re->limit_match); re->limit_match = swap_uint32(re->limit_match);
re->limit_recursion = swap_uint32(re->limit_recursion); re->limit_recursion = swap_uint32(re->limit_recursion);
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 #if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
re->first_char = swap_uint16(re->first_char); re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char); re->req_char = swap_uint16(re->req_char);
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
re->first_char = swap_uint32(re->first_char); re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char); re->req_char = swap_uint32(re->req_char);
#endif #endif
@ -159,27 +159,27 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study->minlength = swap_uint32(study->minlength); study->minlength = swap_uint32(study->minlength);
} }
#ifndef COMPILE_PCRE8 #if PCRE2_CODE_UNIT_WIDTH != 8
ptr = (pcre_uchar *)re + re->name_table_offset; ptr = (pcre_uchar *)re + re->name_table_offset;
length = re->name_count * re->name_entry_size; length = re->name_count * re->name_entry_size;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 #if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
utf = (re->options & PCRE_UTF16) != 0; utf = (re->options & PCRE_UTF16) != 0;
utf16_char = FALSE; utf16_char = FALSE;
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ #endif
while(TRUE) while(TRUE)
{ {
/* Swap previous characters. */ /* Swap previous characters. */
while (length-- > 0) while (length-- > 0)
{ {
#if defined COMPILE_PCRE16 #if PCRE2_CODE_UNIT_WIDTH == 16
*ptr = swap_uint16(*ptr); *ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
*ptr = swap_uint32(*ptr); *ptr = swap_uint32(*ptr);
#endif #endif
ptr++; ptr++;
} }
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 #if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
if (utf16_char) if (utf16_char)
{ {
if (HAS_EXTRALEN(ptr[-1])) if (HAS_EXTRALEN(ptr[-1]))
@ -194,9 +194,9 @@ while(TRUE)
/* Get next opcode. */ /* Get next opcode. */
length = 0; length = 0;
#if defined COMPILE_PCRE16 #if PCRE2_CODE_UNIT_WIDTH == 16
*ptr = swap_uint16(*ptr); *ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
*ptr = swap_uint32(*ptr); *ptr = swap_uint32(*ptr);
#endif #endif
switch (*ptr) switch (*ptr)
@ -204,7 +204,7 @@ while(TRUE)
case OP_END: case OP_END:
return 0; return 0;
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 #if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
case OP_CHAR: case OP_CHAR:
case OP_CHARI: case OP_CHARI:
case OP_NOT: case OP_NOT:
@ -279,12 +279,12 @@ while(TRUE)
case OP_XCLASS: case OP_XCLASS:
/* Reverse the size of the XCLASS instance. */ /* Reverse the size of the XCLASS instance. */
ptr++; ptr++;
#if defined COMPILE_PCRE16 #if PCRE2_CODE_UNIT_WIDTH == 16
*ptr = swap_uint16(*ptr); *ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
*ptr = swap_uint32(*ptr); *ptr = swap_uint32(*ptr);
#endif #endif
#ifndef COMPILE_PCRE32 #if PCRE2_CODE_UNIT_WIDTH != 32
if (LINK_SIZE > 1) if (LINK_SIZE > 1)
{ {
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */ /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
@ -294,9 +294,9 @@ while(TRUE)
#endif #endif
ptr++; ptr++;
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1); length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
#if defined COMPILE_PCRE16 #if PCRE2_CODE_UNIT_WIDTH == 16
*ptr = swap_uint16(*ptr); *ptr = swap_uint16(*ptr);
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
*ptr = swap_uint32(*ptr); *ptr = swap_uint32(*ptr);
#endif #endif
if ((*ptr & XCL_MAP) != 0) if ((*ptr & XCL_MAP) != 0)
@ -310,7 +310,7 @@ while(TRUE)
ptr++; ptr++;
} }
/* Control should never reach here in 16/32 bit mode. */ /* Control should never reach here in 16/32 bit mode. */
#endif /* !COMPILE_PCRE8 */ #endif
#endif /* NEVER */ #endif /* NEVER */

View File

@ -54,21 +54,22 @@ POSSIBILITY OF SUCH DAMAGE.
by defining macros in order to minimize #if usage. */ by defining macros in order to minimize #if usage. */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
#define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5 #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
#define XDIGIT(c) xdigitab[c] #define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5
#define XDIGIT(c) xdigitab[c]
#else /* Either 16-bit or 32-bit */ #else /* Either 16-bit or 32-bit */
#define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff) #define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff)
#if PCRE2_CODE_UNIT_WIDTH == 16 #if PCRE2_CODE_UNIT_WIDTH == 16
#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6 #define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6
#else #else /* 33-bit */
#define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6 #define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6
#endif #endif
#endif #endif
/* Function definitions to allow mutual recursion */ /* Function definitions to allow mutual recursion */
static int static int
@ -1308,7 +1309,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
actual length is stored in the compiled code, so we must update "code" actual length is stored in the compiled code, so we must update "code"
here. */ here. */
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 #if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS: case OP_XCLASS:
ccode = code += GET(code, 1); ccode = code += GET(code, 1);
goto CHECK_CLASS_REPEAT; goto CHECK_CLASS_REPEAT;
@ -1318,7 +1319,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_NCLASS: case OP_NCLASS:
ccode = code + PRIV(OP_lengths)[OP_CLASS]; ccode = code + PRIV(OP_lengths)[OP_CLASS];
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 #if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
CHECK_CLASS_REPEAT: CHECK_CLASS_REPEAT:
#endif #endif
@ -1875,7 +1876,7 @@ else
c -= CHAR_0; c -= CHAR_0;
while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
c = c * 8 + *(++ptr) - CHAR_0; c = c * 8 + *(++ptr) - CHAR_0;
#ifdef COMPILE_PCRE8 #if PCRE2_CODE_UNIT_WIDTH == 8
if (!utf && c > 0xff) *errorcodeptr = ERR51; if (!utf && c > 0xff) *errorcodeptr = ERR51;
#endif #endif
break; break;
@ -1894,15 +1895,15 @@ else
{ {
cc = *ptr++; cc = *ptr++;
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
#ifdef COMPILE_PCRE32 #if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x20000000l) { overflow = TRUE; break; } if (c >= 0x20000000l) { overflow = TRUE; break; }
#endif #endif
c = (c << 3) + cc - CHAR_0 ; c = (c << 3) + cc - CHAR_0 ;
#if defined COMPILE_PCRE8 #if PCRE2_CODE_UNIT_WIDTH == 8
if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE16 #elif PCRE2_CODE_UNIT_WIDTH == 16
if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE32 #elif PCRE2_CODE_UNIT_WIDTH == 32
if (utf && c > 0x10ffffU) { overflow = TRUE; break; } if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
#endif #endif
} }
@ -2241,7 +2242,7 @@ PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
for (;;) for (;;)
{ {
register PCRE2_UCHAR c = *code; register PCRE2_UCHAR c = *code;
if (c == OP_END) return NULL; if (c == OP_END) return NULL;
/* XCLASS is used for classes that cannot be represented just by a bit /* XCLASS is used for classes that cannot be represented just by a bit
@ -3039,7 +3040,6 @@ dynamically as we process the pattern. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
BOOL utf = (options & PCRE2_UTF) != 0; BOOL utf = (options & PCRE2_UTF) != 0;
#if PCRE2_CODE_UNIT_WIDTH != 32 #if PCRE2_CODE_UNIT_WIDTH != 32
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */ PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
#endif #endif
@ -7608,7 +7608,7 @@ help in the case when a regex compiled on a system with 4-byte pointers is run
on another with 8-byte pointers. */ on another with 8-byte pointers. */
#ifdef FIXME #ifdef FIXME
#ifdef COMPILE_PCRE32 #if PCRE2_CODE_UNIT_WIDTH == 32
re->dummy = 0; re->dummy = 0;
#else #else
re->dummy1 = re->dummy2 = re->dummy3 = 0; re->dummy1 = re->dummy2 = re->dummy3 = 0;

View File

@ -632,7 +632,7 @@ for (;;)
/* If this opcode inspects a character, but we are at the end of the /* If this opcode inspects a character, but we are at the end of the
subject, remember the fact for use when testing for a partial match. */ subject, remember the fact for use when testing for a partial match. */
if (clen == 0 && poptable[codevalue] != 0) if (clen == 0 && poptable[codevalue] != 0)
could_continue = TRUE; could_continue = TRUE;
@ -1400,7 +1400,7 @@ for (;;)
case 0x2028: case 0x2028:
case 0x2029: case 0x2029:
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break; if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
goto ANYNL01; goto ANYNL01;
case CHAR_CR: case CHAR_CR:
@ -1669,7 +1669,7 @@ for (;;)
case 0x2028: case 0x2028:
case 0x2029: case 0x2029:
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break; if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
goto ANYNL02; goto ANYNL02;
case CHAR_CR: case CHAR_CR:
@ -1939,7 +1939,7 @@ for (;;)
case 0x2028: case 0x2028:
case 0x2029: case 0x2029:
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break; if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
goto ANYNL03; goto ANYNL03;
case CHAR_CR: case CHAR_CR:
@ -2121,7 +2121,7 @@ for (;;)
case 0x2028: case 0x2028:
case 0x2029: case 0x2029:
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break; if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
case CHAR_LF: case CHAR_LF:
ADD_NEW(state_offset + 1, 0); ADD_NEW(state_offset + 1, 0);
@ -2985,7 +2985,7 @@ for (;;)
The "could_continue" variable is true if a state could have continued but The "could_continue" variable is true if a state could have continued but
for the fact that the end of the subject was reached. */ for the fact that the end of the subject was reached. */
if (new_count <= 0) if (new_count <= 0)
{ {
if (rlevel == 1 && /* Top level, and */ if (rlevel == 1 && /* Top level, and */
@ -3378,7 +3378,7 @@ for (;;)
/* The following two optimizations are disabled for partial matching. */ /* The following two optimizations are disabled for partial matching. */
if ((mb->moptions & PCRE2_PARTIAL_HARD & PCRE2_PARTIAL_SOFT) == 0) if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
{ {
/* The minimum matching length is a lower bound; no actual string of that /* The minimum matching length is a lower bound; no actual string of that
length may actually match the pattern. Although the value is, strictly, length may actually match the pattern. Although the value is, strictly,
@ -3461,7 +3461,7 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry /* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */ on only if not anchored. */
if (rc != PCRE2_ERROR_NOMATCH || anchored) if (rc != PCRE2_ERROR_NOMATCH || anchored)
{ {
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0) if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
@ -3470,6 +3470,8 @@ for (;;)
match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject); match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject);
} }
match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject); match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject);
match_data->rightchar = 0; /* FIXME */
match_data->startchar = (PCRE2_OFFSET)(start_match - subject);
match_data->rc = rc; match_data->rc = rc;
return rc; return rc;
} }

View File

@ -90,30 +90,26 @@ static unsigned int
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf) print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
{ {
uint32_t c = *ptr; uint32_t c = *ptr;
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8 BOOL one_code_unit = !utf;
int a, i, s;
#endif
/* If UTF is supported and requested, check for a one-code-unit character. The /* If UTF is supported and requested, check for a valid single code unit. */
16-bit and 32-bit tests are for malformed UTF, and should only trigger if the
sanity check is turned off. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf) if (utf)
{ {
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
utf = (c & 0xc0) == 0xc0; one_code_unit = c < 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16 #elif PCRE2_CODE_UNIT_WIDTH == 16
utf = (c & 0xfc00) == 0xd800; one_code_unit = (c & 0xfc00) != 0xd800;
#else #else
utf = (c & 0xfffff800u) != 0xd800u; one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif #endif
} }
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
/* Handle a one-code-unit character at any width. */ /* Handle a valid one-code-unit character at any width. */
if (!utf) if (one_code_unit)
{ {
if (PRINTABLE(c)) fprintf(f, "%c", (char)c); if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c < 0x80) fprintf(f, "\\x%02x", c); else if (c < 0x80) fprintf(f, "\\x%02x", c);
@ -121,41 +117,43 @@ if (!utf)
return 0; return 0;
} }
/* Per-width code for handling non-one-code-unit UTF characters. */ /* Per-width code for invalid UTF code units and multi-unit UTF characters. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
/* Handle a multi-byte UTF-8 character. */ /* Malformed UTF-8 should occur only if the sanity check has been turned off.
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
\X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ if ((c & 0xc0) != 0xc0)
s = 6*a;
c = (c & utf8_table3[a]) << s;
for (i = 1; i <= a; i++)
{ {
/* This is a check for malformed UTF-8; it should only occur if the sanity fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
check has been turned off. Rather than swallow random bytes, just stop if return 0;
we hit a bad one. Print it with \X instead of \x as an indication. */ }
else
if ((ptr[i] & 0xc0) != 0x80) {
int i;
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & utf8_table3[a]) << s;
for (i = 1; i <= a; i++)
{ {
fprintf(f, "\\X{%x}", c); if ((ptr[i] & 0xc0) != 0x80)
return i - 1; {
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
return i - 1;
}
s -= 6;
c |= (ptr[i] & 0x3f) << s;
} }
fprintf(f, "\\x{%x}", c);
/* The byte is OK */ return a;
}
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
fprintf(f, "\\x{%x}", c);
return a;
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* Handle a multi-code-unit UTF-16 character, starting with a check for /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
malformed UTF-16; it should only occur if the sanity check has been turned off. Print it with \X instead of \x as an indication. */
Rather than swallow a low surrogate, just stop if we hit a bad one. Print it
with \X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 16 #if PCRE2_CODE_UNIT_WIDTH == 16
if ((ptr[1] & 0xfc00) != 0xdc00) if ((ptr[1] & 0xfc00) != 0xdc00)
@ -176,7 +174,7 @@ as an indication. */
fprintf(f, "\\X{%x}", c); fprintf(f, "\\X{%x}", c);
return 0; return 0;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
} }

View File

@ -751,7 +751,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
register uint32_t c; register uint32_t c;
int yield = SSB_DONE; int yield = SSB_DONE;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 #if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
int table_limit = utf? 16:32; int table_limit = utf? 16:32;
#else #else
int table_limit = 32; int table_limit = 32;

1569
testdata/testinput5 vendored Normal file

File diff suppressed because it is too large Load Diff

4786
testdata/testinput6 vendored Normal file

File diff suppressed because it is too large Load Diff

2126
testdata/testinput7 vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -47,7 +47,7 @@ Subject length lower bound = 3
abc abc
0: abc 0: abc
abc\=anchored abc\=anchored
No match 0: abc
*** Failers *** Failers
No match No match
defabc defabc
@ -352,7 +352,7 @@ Subject length lower bound = 3
abcdef abcdef
0: abc 0: abc
abcdef\=notbol abcdef\=notbol
No match 0: def
/.*((abc)$|(def))/I /.*((abc)$|(def))/I
Capturing subpattern count = 3 Capturing subpattern count = 3

3963
testdata/testoutput5 vendored Normal file

File diff suppressed because it is too large Load Diff

7669
testdata/testoutput6 vendored Normal file

File diff suppressed because it is too large Load Diff

3772
testdata/testoutput7 vendored Normal file

File diff suppressed because it is too large Load Diff