Add more tests.
This commit is contained in:
parent
2addfec25d
commit
8792477279
206
RunTest
206
RunTest
|
@ -48,17 +48,16 @@
|
|||
# Define test titles in variables so that they can be output as a list. Some
|
||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||
|
||||
title1="Test 1: Main functionality (Compatible with Perl >= 5.10)"
|
||||
title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)"
|
||||
title2="Test 2: API, errors, internals, and non-Perl stuff"
|
||||
title3="Test 3: Locale-specific features"
|
||||
title4A="Test 4: UTF"
|
||||
title4B=" and Unicode property support (Compatible with Perl >= 5.10)"
|
||||
#title5="Test 5: API, internals, and non-Perl stuff for UTF"
|
||||
#title6="Test 6: Unicode property support (Compatible with Perl >= 5.10)"
|
||||
#title7="Test 7: API, internals, and non-Perl stuff for Unicode property support"
|
||||
#title8="Test 8: DFA matching main functionality"
|
||||
#title9="Test 9: DFA matching with UTF"
|
||||
#title10="Test 10: DFA matching with Unicode properties"
|
||||
title4B=" and Unicode property support (compatible with Perl >= 5.10)"
|
||||
title5A="Test 5: API, internals, and non-Perl stuff for UTF"
|
||||
title5B=" and UCP support"
|
||||
title6="Test 6: DFA matching main non-UTF, non-UCP functionality"
|
||||
title7A="Test 7: DFA matching with UTF"
|
||||
title7B=" and Unicode property support"
|
||||
#title11="Test 11: Internal offsets and code size tests"
|
||||
#title12="Test 12: JIT-specific features (when JIT is available)"
|
||||
#title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||
|
@ -80,12 +79,12 @@ maxtest=2
|
|||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title1
|
||||
echo $title2 "(not UTF)"
|
||||
echo $title2 "(not UTF or UCP)"
|
||||
echo $title3
|
||||
echo $title4A $title4B
|
||||
# echo $title5 support
|
||||
# echo $title6
|
||||
# echo $title7
|
||||
echo $title5A $title5B
|
||||
echo $title6
|
||||
echo $title7A $title7B
|
||||
# echo $title8
|
||||
# echo $title9
|
||||
# echo $title10
|
||||
|
@ -176,9 +175,9 @@ do1=no
|
|||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
#do5=no
|
||||
#do6=no
|
||||
#do7=no
|
||||
do5=no
|
||||
do6=no
|
||||
do7=no
|
||||
#do8=no
|
||||
#do9=no
|
||||
#do10=no
|
||||
|
@ -205,9 +204,9 @@ while [ $# -gt 0 ] ; do
|
|||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
# 5) do5=yes;;
|
||||
# 6) do6=yes;;
|
||||
# 7) do7=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
7) do7=yes;;
|
||||
# 8) do8=yes;;
|
||||
# 9) do9=yes;;
|
||||
# 10) do10=yes;;
|
||||
|
@ -346,9 +345,10 @@ fi
|
|||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no \
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
$do5 = no -a $do6 = no -a $do7 = no \
|
||||
]; then
|
||||
# -a $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
# -a $do8 = no -a \
|
||||
# $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \
|
||||
# $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \
|
||||
# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \
|
||||
|
@ -359,9 +359,9 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no \
|
|||
do2=yes
|
||||
do3=yes
|
||||
do4=yes
|
||||
# do5=yes
|
||||
# do6=yes
|
||||
# do7=yes
|
||||
do5=yes
|
||||
do6=yes
|
||||
do7=yes
|
||||
# do8=yes
|
||||
# do9=yes
|
||||
# do10=yes
|
||||
|
@ -425,7 +425,7 @@ fi
|
|||
# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo $title2 "(not UTF-$bits)"
|
||||
echo $title2 "(excluding UTF-$bits)"
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
|
@ -537,117 +537,53 @@ if [ $do4 = yes ] ; then
|
|||
fi
|
||||
fi
|
||||
|
||||
#if [ $do5 = yes ] ; then
|
||||
# echo ${title5}-${bits} support
|
||||
# if [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput5 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do6 = yes ] ; then
|
||||
# echo $title6
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput6 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput6 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Test non-Perl-compatible Unicode property support
|
||||
#
|
||||
#if [ $do7 = yes ] ; then
|
||||
# echo $title7
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s" $jitopt; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput7 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
# elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
# else echo " OK"
|
||||
# fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
## Tests for DFA matching support
|
||||
#
|
||||
#if [ $do8 = yes ] ; then
|
||||
# echo $title8
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput8 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput8 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
#fi
|
||||
#
|
||||
#if [ $do9 = yes ] ; then
|
||||
# echo ${title9}-${bits}
|
||||
# if [ $utf -eq 0 ] ; then
|
||||
# echo " Skipped because UTF-$bits support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput9 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput9 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
#if [ $do10 = yes ] ; then
|
||||
# echo $title10
|
||||
# if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
|
||||
# echo " Skipped because Unicode property support is not available"
|
||||
# else
|
||||
# for opt in "" "-s"; do
|
||||
# $sim $valgrind ./pcre2test -q $bmode $opt -dfa $testdata/testinput10 testtry
|
||||
# if [ $? = 0 ] ; then
|
||||
# $cf $testdata/testoutput10 testtry
|
||||
# if [ $? != 0 ] ; then exit 1; fi
|
||||
# else exit 1
|
||||
# fi
|
||||
# if [ "$opt" = "-s" ] ; then echo " OK with study" ; else echo " OK"; fi
|
||||
# done
|
||||
# fi
|
||||
#fi
|
||||
#
|
||||
if [ $do5 = yes ] ; then
|
||||
echo ${title5A}-${bits}$title5B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput5 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
if [ "$opt" = "-jit" ] ; then echo " OK with JIT"
|
||||
else echo " OK"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Tests for DFA matching support
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo $title6
|
||||
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput6 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
fi
|
||||
|
||||
if [ $do7 = yes ] ; then
|
||||
echo ${title7A}-${bits}$title7B
|
||||
if [ $utf -eq 0 ] ; then
|
||||
echo " Skipped because UTF-$bits support is not available"
|
||||
else
|
||||
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $testdata/testoutput7 testtry
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo " OK"
|
||||
fi
|
||||
fi
|
||||
|
||||
## Test of internal offsets and code sizes. This test is run only when there
|
||||
## is Unicode property support and the link size is 2. The actual tests are
|
||||
## mostly the same as in some of the above, but in this test we inspect some
|
||||
|
|
20
src/pcre2.h
20
src/pcre2.h
|
@ -123,19 +123,21 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004
|
||||
|
||||
/* These are for pcre2_match() and pcre2_dfa_match(). */
|
||||
/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
|
||||
PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions, so take care not to define synonyms by mistake. */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001
|
||||
#define PCRE2_NOTEOL 0x00000002
|
||||
#define PCRE2_NOTEMPTY 0x00000004
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020
|
||||
#define PCRE2_NOTBOL 0x00000008
|
||||
#define PCRE2_NOTEOL 0x00000010
|
||||
#define PCRE2_NOTEMPTY 0x00000020
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000040
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000080
|
||||
#define PCRE2_PARTIAL_HARD 0x00000100
|
||||
|
||||
/* These are additional options for pcre2_dfa_match(). */
|
||||
|
||||
#define PCRE2_DFA_RESTART 0x00000040
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080
|
||||
#define PCRE2_DFA_RESTART 0x00000200
|
||||
#define PCRE2_DFA_SHORTEST 0x00000400
|
||||
|
||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
||||
newline values must be kept in step with values set in config.h and both sets
|
||||
|
|
|
@ -123,19 +123,21 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004
|
||||
|
||||
/* These are for pcre2_match() and pcre2_dfa_match(). */
|
||||
/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
|
||||
PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions, so take care not to define synonyms by mistake. */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001
|
||||
#define PCRE2_NOTEOL 0x00000002
|
||||
#define PCRE2_NOTEMPTY 0x00000004
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020
|
||||
#define PCRE2_NOTBOL 0x00000008
|
||||
#define PCRE2_NOTEOL 0x00000010
|
||||
#define PCRE2_NOTEMPTY 0x00000020
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000040
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000080
|
||||
#define PCRE2_PARTIAL_HARD 0x00000100
|
||||
|
||||
/* These are additional options for pcre2_dfa_match(). */
|
||||
|
||||
#define PCRE2_DFA_RESTART 0x00000040
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080
|
||||
#define PCRE2_DFA_RESTART 0x00000200
|
||||
#define PCRE2_DFA_SHORTEST 0x00000400
|
||||
|
||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
||||
newline values must be kept in step with values set in config.h and both sets
|
||||
|
|
|
@ -107,14 +107,14 @@ return -1;
|
|||
|
||||
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
||||
pcre_study_data *study;
|
||||
#ifndef COMPILE_PCRE8
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
pcre_uchar *ptr;
|
||||
int length;
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
|
||||
BOOL utf;
|
||||
BOOL utf16_char;
|
||||
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number == MAGIC_NUMBER)
|
||||
|
@ -134,10 +134,10 @@ re->flags = swap_uint32(re->flags);
|
|||
re->limit_match = swap_uint32(re->limit_match);
|
||||
re->limit_recursion = swap_uint32(re->limit_recursion);
|
||||
|
||||
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
|
||||
re->first_char = swap_uint16(re->first_char);
|
||||
re->req_char = swap_uint16(re->req_char);
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
re->first_char = swap_uint32(re->first_char);
|
||||
re->req_char = swap_uint32(re->req_char);
|
||||
#endif
|
||||
|
@ -159,27 +159,27 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
|||
study->minlength = swap_uint32(study->minlength);
|
||||
}
|
||||
|
||||
#ifndef COMPILE_PCRE8
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
ptr = (pcre_uchar *)re + re->name_table_offset;
|
||||
length = re->name_count * re->name_entry_size;
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
|
||||
utf = (re->options & PCRE_UTF16) != 0;
|
||||
utf16_char = FALSE;
|
||||
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
|
||||
#endif
|
||||
|
||||
while(TRUE)
|
||||
{
|
||||
/* Swap previous characters. */
|
||||
while (length-- > 0)
|
||||
{
|
||||
#if defined COMPILE_PCRE16
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
*ptr = swap_uint16(*ptr);
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
*ptr = swap_uint32(*ptr);
|
||||
#endif
|
||||
ptr++;
|
||||
}
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (utf16_char)
|
||||
{
|
||||
if (HAS_EXTRALEN(ptr[-1]))
|
||||
|
@ -194,9 +194,9 @@ while(TRUE)
|
|||
|
||||
/* Get next opcode. */
|
||||
length = 0;
|
||||
#if defined COMPILE_PCRE16
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
*ptr = swap_uint16(*ptr);
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
*ptr = swap_uint32(*ptr);
|
||||
#endif
|
||||
switch (*ptr)
|
||||
|
@ -204,7 +204,7 @@ while(TRUE)
|
|||
case OP_END:
|
||||
return 0;
|
||||
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE16
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 16
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
|
@ -279,12 +279,12 @@ while(TRUE)
|
|||
case OP_XCLASS:
|
||||
/* Reverse the size of the XCLASS instance. */
|
||||
ptr++;
|
||||
#if defined COMPILE_PCRE16
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
*ptr = swap_uint16(*ptr);
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
*ptr = swap_uint32(*ptr);
|
||||
#endif
|
||||
#ifndef COMPILE_PCRE32
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (LINK_SIZE > 1)
|
||||
{
|
||||
/* LINK_SIZE can be 1 or 2 in 16 bit mode. */
|
||||
|
@ -294,9 +294,9 @@ while(TRUE)
|
|||
#endif
|
||||
ptr++;
|
||||
length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1);
|
||||
#if defined COMPILE_PCRE16
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
*ptr = swap_uint16(*ptr);
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
*ptr = swap_uint32(*ptr);
|
||||
#endif
|
||||
if ((*ptr & XCL_MAP) != 0)
|
||||
|
@ -310,7 +310,7 @@ while(TRUE)
|
|||
ptr++;
|
||||
}
|
||||
/* Control should never reach here in 16/32 bit mode. */
|
||||
#endif /* !COMPILE_PCRE8 */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* NEVER */
|
||||
|
|
|
@ -54,21 +54,22 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
by defining macros in order to minimize #if usage. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5
|
||||
#define XDIGIT(c) xdigitab[c]
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5
|
||||
#define XDIGIT(c) xdigitab[c]
|
||||
|
||||
#else /* Either 16-bit or 32-bit */
|
||||
#define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff)
|
||||
#define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff)
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6
|
||||
|
||||
#else
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6
|
||||
#else /* 33-bit */
|
||||
#define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Function definitions to allow mutual recursion */
|
||||
|
||||
static int
|
||||
|
@ -1308,7 +1309,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|||
actual length is stored in the compiled code, so we must update "code"
|
||||
here. */
|
||||
|
||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
ccode = code += GET(code, 1);
|
||||
goto CHECK_CLASS_REPEAT;
|
||||
|
@ -1318,7 +1319,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
|
|||
case OP_NCLASS:
|
||||
ccode = code + PRIV(OP_lengths)[OP_CLASS];
|
||||
|
||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||
#if defined SUPPORT_UTF || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
CHECK_CLASS_REPEAT:
|
||||
#endif
|
||||
|
||||
|
@ -1875,7 +1876,7 @@ else
|
|||
c -= CHAR_0;
|
||||
while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
|
||||
c = c * 8 + *(++ptr) - CHAR_0;
|
||||
#ifdef COMPILE_PCRE8
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (!utf && c > 0xff) *errorcodeptr = ERR51;
|
||||
#endif
|
||||
break;
|
||||
|
@ -1894,15 +1895,15 @@ else
|
|||
{
|
||||
cc = *ptr++;
|
||||
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
|
||||
#ifdef COMPILE_PCRE32
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x20000000l) { overflow = TRUE; break; }
|
||||
#endif
|
||||
c = (c << 3) + cc - CHAR_0 ;
|
||||
#if defined COMPILE_PCRE8
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
|
||||
#elif defined COMPILE_PCRE16
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
|
||||
#elif defined COMPILE_PCRE32
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
|
||||
#endif
|
||||
}
|
||||
|
@ -3039,7 +3040,6 @@ dynamically as we process the pattern. */
|
|||
#ifdef SUPPORT_UTF
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
|
||||
#endif
|
||||
|
||||
|
@ -7608,7 +7608,7 @@ help in the case when a regex compiled on a system with 4-byte pointers is run
|
|||
on another with 8-byte pointers. */
|
||||
|
||||
#ifdef FIXME
|
||||
#ifdef COMPILE_PCRE32
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
re->dummy = 0;
|
||||
#else
|
||||
re->dummy1 = re->dummy2 = re->dummy3 = 0;
|
||||
|
|
|
@ -1400,7 +1400,7 @@ for (;;)
|
|||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL01;
|
||||
|
||||
case CHAR_CR:
|
||||
|
@ -1669,7 +1669,7 @@ for (;;)
|
|||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL02;
|
||||
|
||||
case CHAR_CR:
|
||||
|
@ -1939,7 +1939,7 @@ for (;;)
|
|||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
goto ANYNL03;
|
||||
|
||||
case CHAR_CR:
|
||||
|
@ -2121,7 +2121,7 @@ for (;;)
|
|||
case 0x2028:
|
||||
case 0x2029:
|
||||
#endif /* Not EBCDIC */
|
||||
if ((mb->moptions & PCRE2_BSR_ANYCRLF) != 0) break;
|
||||
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
|
||||
|
||||
case CHAR_LF:
|
||||
ADD_NEW(state_offset + 1, 0);
|
||||
|
@ -3378,7 +3378,7 @@ for (;;)
|
|||
|
||||
/* The following two optimizations are disabled for partial matching. */
|
||||
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD & PCRE2_PARTIAL_SOFT) == 0)
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
|
||||
{
|
||||
/* The minimum matching length is a lower bound; no actual string of that
|
||||
length may actually match the pattern. Although the value is, strictly,
|
||||
|
@ -3470,6 +3470,8 @@ for (;;)
|
|||
match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject);
|
||||
}
|
||||
match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject);
|
||||
match_data->rightchar = 0; /* FIXME */
|
||||
match_data->startchar = (PCRE2_OFFSET)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -90,30 +90,26 @@ static unsigned int
|
|||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
uint32_t c = *ptr;
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int a, i, s;
|
||||
#endif
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
/* If UTF is supported and requested, check for a one-code-unit character. The
|
||||
16-bit and 32-bit tests are for malformed UTF, and should only trigger if the
|
||||
sanity check is turned off. */
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
utf = (c & 0xc0) == 0xc0;
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
utf = (c & 0xfc00) == 0xd800;
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
utf = (c & 0xfffff800u) != 0xd800u;
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
/* Handle a one-code-unit character at any width. */
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (!utf)
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
|
@ -121,41 +117,43 @@ if (!utf)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Per-width code for handling non-one-code-unit UTF characters. */
|
||||
/* Per-width code for invalid UTF code units and multi-unit UTF characters. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
/* Handle a multi-byte UTF-8 character. */
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
s = 6*a;
|
||||
c = (c & utf8_table3[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow random bytes, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/* The byte is OK */
|
||||
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & utf8_table3[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* Handle a multi-code-unit UTF-16 character, starting with a check for
|
||||
malformed UTF-16; it should only occur if the sanity check has been turned off.
|
||||
Rather than swallow a low surrogate, just stop if we hit a bad one. Print it
|
||||
with \X instead of \x as an indication. */
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
|
@ -176,7 +174,7 @@ as an indication. */
|
|||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UTF */
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -751,7 +751,7 @@ set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
|
|||
register uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
|
||||
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int table_limit = utf? 16:32;
|
||||
#else
|
||||
int table_limit = 32;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -47,7 +47,7 @@ Subject length lower bound = 3
|
|||
abc
|
||||
0: abc
|
||||
abc\=anchored
|
||||
No match
|
||||
0: abc
|
||||
*** Failers
|
||||
No match
|
||||
defabc
|
||||
|
@ -352,7 +352,7 @@ Subject length lower bound = 3
|
|||
abcdef
|
||||
0: abc
|
||||
abcdef\=notbol
|
||||
No match
|
||||
0: def
|
||||
|
||||
/.*((abc)$|(def))/I
|
||||
Capturing subpattern count = 3
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue