From 55967f9a4fe6fc022f592e9506b07b0d3bc57b02 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 8 Aug 2014 18:18:18 +0000 Subject: [PATCH] Preparatory work for JIT. --- RunTest | 149 +++++++++++++++++++++++--------------- src/config.h | 8 +-- src/pcre2.h | 3 +- src/pcre2.h.in | 3 +- src/pcre2_config.c | 5 +- src/pcre2_internal.h | 2 + src/pcre2_jit_match.c | 9 ++- src/pcre2_jit_misc.c | 18 +++++ src/pcre2_match.c | 10 ++- src/pcre2grep.c | 6 +- src/pcre2test.c | 14 ++-- testdata/testinput10 | 9 --- testdata/testinput14 | 9 +++ testdata/testinput15 | 87 ++++++++++++++++++++++ testdata/testinput16 | 81 +++++++++++++++++++++ testdata/testinput17 | 17 +++++ testdata/testinput9 | 83 +-------------------- testdata/testoutput10 | 12 ---- testdata/testoutput14 | 21 ++++++ testdata/testoutput15 | 164 ++++++++++++++++++++++++++++++++++++++++++ testdata/testoutput16 | 128 +++++++++++++++++++++++++++++++++ testdata/testoutput17 | 20 ++++++ testdata/testoutput9 | 130 +-------------------------------- 23 files changed, 678 insertions(+), 310 deletions(-) create mode 100644 testdata/testinput14 create mode 100644 testdata/testinput15 create mode 100644 testdata/testinput16 create mode 100644 testdata/testinput17 create mode 100644 testdata/testoutput14 create mode 100644 testdata/testoutput15 create mode 100644 testdata/testoutput16 create mode 100644 testdata/testoutput17 diff --git a/RunTest b/RunTest index e178d81..95b71e6 100755 --- a/RunTest +++ b/RunTest @@ -64,9 +64,10 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support" title11="Test 11: Specials for the basic 16-bit and 32-bit libraries" title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support" title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries" - -#title12="Test 12: JIT-specific features (when JIT is available)" -#title13="Test 13: JIT-specific features (when JIT is not available)" +title14="Test 14: JIT-specific features (when JIT is not available)" +title15="Test 15: JIT-specific features (when JIT is available)" +title16="Test 16: Tests of the POSIX interface, excluding UTF/UCP" +title17="Test 17: Tests of the POSIX interface with UTF/UCP" #title21="Test 21: Reloads for the basic 16/32-bit library" #title22="Test 22: Reloads for the 16/32-bit library with UTF-16/32 support" @@ -87,10 +88,10 @@ if [ $# -eq 1 -a "$1" = "list" ]; then echo $title11 echo $title12 echo $title13 -# echo $title14 -# echo $title15 -# echo $title16 -# echo $title17 + echo $title14 + echo $title15 + echo $title16 + echo $title17 # echo $title18 # echo $title19 # echo $title20 @@ -176,10 +177,10 @@ do10=no do11=no do12=no do13=no -#do14=no -#do15=no -#do16=no -#do17=no +do14=no +do15=no +do16=no +do17=no #do18=no #do19=no #do20=no @@ -201,10 +202,10 @@ while [ $# -gt 0 ] ; do 11) do11=yes;; 12) do12=yes;; 13) do13=yes;; -# 14) do14=yes;; -# 15) do15=yes;; -# 16) do16=yes;; -# 17) do17=yes;; + 14) do14=yes;; + 15) do15=yes;; + 16) do16=yes;; + 17) do17=yes;; # 18) do18=yes;; # 19) do19=yes;; # 20) do20=yes;; @@ -332,10 +333,10 @@ fi if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \ $do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \ $do9 = no -a $do10 = no -a $do11 = no -a $do12 = no -a \ - $do13 = no \ + $do13 = no -a $do14 = no -a $do15 = no -a $do16 = no -a \ + $do17 = no \ ]; then -# -a $do14 = no -a $do15 = no -a $do16 = no -a \ -# $do17 = no -a $do18 = no -a $do19 = no -a $do20 = no -a \ +# -a $do18 = no -a $do19 = no -a $do20 = no -a \ # $do21 = no -a $do22 = no do1=yes @@ -351,10 +352,10 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \ do11=yes do12=yes do13=yes -# do14=yes -# do15=yes -# do16=yes -# do17=yes + do14=yes + do15=yes + do16=yes + do17=yes # do18=yes # do19=yes # do20=yes @@ -694,40 +695,76 @@ if [ $do13 = yes ] ; then fi fi -## Test JIT-specific features when JIT is available -# -#if [ $do12 = yes ] ; then -# echo $title12 -# if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then -# echo " Skipped because JIT is not available or not usable" -# else -# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput12 testtry -# if [ $? = 0 ] ; then -# $cf $testdata/testoutput12 testtry -# if [ $? != 0 ] ; then exit 1; fi -# else exit 1 -# fi -# echo " OK" -# fi -#fi -# -## Test JIT-specific features when JIT is not available -# -#if [ $do13 = yes ] ; then -# echo $title13 -# if [ $jit -ne 0 ] ; then -# echo " Skipped because JIT is available" -# else -# $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry -# if [ $? = 0 ] ; then -# $cf $testdata/testoutput13 testtry -# if [ $? != 0 ] ; then exit 1; fi -# else exit 1 -# fi -# echo " OK" -# fi -#fi -# +# Test JIT-specific features when JIT is not available + +if [ $do14 = yes ] ; then + echo $title14 + if [ $jit -ne 0 ] ; then + echo " Skipped because JIT is available" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry + if [ $? = 0 ] ; then + $cf $testdata/testoutput14 testtry + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo " OK" + fi +fi + +# Test JIT-specific features when JIT is available + +if [ $do15 = yes ] ; then + echo $title15 + if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then + echo " Skipped because JIT is not available or not usable" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry + if [ $? = 0 ] ; then + $cf $testdata/testoutput15 testtry + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo " OK" + fi +fi + +# Tests for the POSIX interface without UTF/UCP (8-bit only) + +if [ $do16 = yes ] ; then + echo $title16 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry + if [ $? = 0 ] ; then + $cf $testdata/testoutput16 testtry + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo " OK" + fi +fi + +# Tests for the POSIX interface with UTF/UCP (8-bit only) + +if [ $do17 = yes ] ; then + echo $title17 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry + if [ $? = 0 ] ; then + $cf $testdata/testoutput17 testtry + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + echo " OK" + fi +fi + ## Tests for reloads with 16/32-bit library # #if [ $do21 = yes ] ; then diff --git a/src/config.h b/src/config.h index b81449e..5354e9f 100644 --- a/src/config.h +++ b/src/config.h @@ -80,10 +80,10 @@ sure both macros are undefined; an emulation function will then be used. */ #define HAVE_MEMORY_H 1 /* Define if you have POSIX threads libraries and header files. */ -/* #undef HAVE_PTHREAD */ +#define HAVE_PTHREAD 1 /* Have PTHREAD_PRIO_INHERIT. */ -/* #undef HAVE_PTHREAD_PRIO_INHERIT */ +#define HAVE_PTHREAD_PRIO_INHERIT 1 /* Define to 1 if you have the header file. */ #define HAVE_READLINE_HISTORY_H 1 @@ -242,7 +242,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define STDC_HEADERS 1 /* Define to any value to enable support for Just-In-Time compiling. */ -/* #undef SUPPORT_JIT */ +#define SUPPORT_JIT /**/ /* Define to any value to allow pcre2grep to be linked with libbz2, so that it is able to handle .bz2 files. */ @@ -262,7 +262,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define SUPPORT_PCRE16 /**/ /* Define to any value to enable JIT support in pcre2grep. */ -/* #undef SUPPORT_PCRE2GREP_JIT */ +#define SUPPORT_PCRE2GREP_JIT /**/ /* Define to any value to enable the 32 bit PCRE2 library. */ #define SUPPORT_PCRE32 /**/ diff --git a/src/pcre2.h b/src/pcre2.h index ca74493..be214ee 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -467,7 +467,8 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ - pcre2_match_data *, pcre2_jit_stack *); \ + pcre2_match_data *, pcre2_match_context *, \ + pcre2_jit_stack *); \ PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ PCRE2_EXP_DECL \ pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \ diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 5b4cc15..53115ac 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -467,7 +467,8 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \ PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ - pcre2_match_data *, pcre2_jit_stack *); \ + pcre2_match_data *, pcre2_match_context *, \ + pcre2_jit_stack *); \ PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ PCRE2_EXP_DECL \ pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \ diff --git a/src/pcre2_config.c b/src/pcre2_config.c index a0d544d..4b24407 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -97,8 +97,11 @@ switch (what) case PCRE2_CONFIG_JITTARGET: #ifdef SUPPORT_JIT -FIXME: This needs re-design. +/* FIXME: This needs re-design. *((const char **)where) = PRIV(jit_get_target)(); +*/ + *((const char **)where) = ""; + #else *((const char **)where) = NULL; #endif diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index f4261eb..590878e 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1838,6 +1838,7 @@ is available. */ #define _pcre2_compile_context_init PCRE2_SUFFIX(_pcre2_compile_context_init_) #define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_) #define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_) +#define _pcre2_jit_get_size PCRE2_SUFFIX(_pcre2_jit_get_size_) #define _pcre2_match_context_init PCRE2_SUFFIX(_pcre2_match_context_init_) #define _pcre2_memctl_malloc PCRE2_SUFFIX(_pcre2_memctl_malloc_) #define _pcre2_ord2utf PCRE2_SUFFIX(_pcre2_ord2utf_) @@ -1856,6 +1857,7 @@ extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); +extern int _pcre2_jit_get_size(void *); extern void _pcre2_match_context_init(pcre2_match_context *, BOOL); extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *); extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c index b3c65e1..c779387 100644 --- a/src/pcre2_jit_match.c +++ b/src/pcre2_jit_match.c @@ -59,6 +59,7 @@ Arguments: start_offset where to start in the subject string options option bits match_data points to a match_data block + mcontext points to a match context jit_stack points to a JIT stack Returns: > 0 => success; value is the number of ovector pairs filled @@ -72,7 +73,7 @@ Returns: > 0 => success; value is the number of ovector pairs filled PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length, PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data, - pcre2_jit_stack *jit_stack) + pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack) { #ifndef SUPPORT_JIT (void)code; @@ -81,16 +82,18 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length, (void)start_offset; (void)options; (void)match_data; +(void)mcontext; (void)jit_stack; -return PCRE2_ERROR_NOMATCH; +return PCRE2_ERROR_JIT_BADOPTION; #else /* SUPPORT_JIT */ /* Dummy code */ code=code;subject=subject;length=length; start_offset=start_offset; options=options; match_data=match_data; +mcontext=mcontext; jit_stack=jit_stack; -return PCRE2_ERROR_NOMATCH; +return PCRE2_ERROR_JIT_BADOPTION; #endif /* SUPPORT_JIT */ } diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c index ee4ea88..9c28c72 100644 --- a/src/pcre2_jit_misc.c +++ b/src/pcre2_jit_misc.c @@ -122,4 +122,22 @@ jit_stack=jit_stack; #endif /* SUPPORT_JIT */ } + +/************************************************* +* Get size of JIT code * +*************************************************/ + +int +PRIV(jit_get_size)(void *executable_jit) +{ +#ifndef SUPPORT_JIT +(void)executable_jit; +return 0; +#else /* SUPPORT_JIT */ + +executable_jit = executable_jit; +return 0; /* FIXME */ +#endif +} + /* End of pcre2_jit_misc.c */ diff --git a/src/pcre2_match.c b/src/pcre2_match.c index a76b41e..1047f0a 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -56,6 +56,10 @@ POSSIBILITY OF SUCH DAMAGE. (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT|PCRE2_NO_START_OPTIMIZE) + +#define PUBLIC_JIT_MATCH_OPTIONS \ + (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD) /* The mb->capture_last field uses the lower 16 bits for the last captured substring (which can never be greater than 65535) and a bit in the top half @@ -6271,11 +6275,11 @@ an unsupported option is set or if JIT returns BADOPTION (which means that the selected normal or partial matching mode was not compiled). */ #ifdef SUPPORT_JIT -if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0 && +if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0 && mcontext->bsr_convention == 0 && mcontext->newline_convention == 0) { - rc = PRIV(jit_exec)(subject, length, start_offset, options, match_data, - mcontext); + rc = pcre2_jit_match(code, subject, length, start_offset, options, + match_data, mcontext, NULL); if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc; } #endif diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 6d4496e..6cefbdd 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -3115,14 +3115,14 @@ for (fn = pattern_files; fn != NULL; fn = fn->next) #ifdef SUPPORT_PCRE2GREP_JIT if (use_jit) - jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024); + jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 1024*1024); #endif for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) { #ifdef SUPPORT_PCRE2GREP_JIT - if (jit_stack != NULL && cp->hint != NULL) - pcre2_assign_jit_stack(cp->hint, NULL, jit_stack); + if (jit_stack != NULL && cp->compiled != NULL) + pcre2_jit_stack_assign(cp->compiled, NULL, jit_stack); #endif } diff --git a/src/pcre2test.c b/src/pcre2test.c index 571928d..99ed684 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -3190,18 +3190,18 @@ if ((pat_patctl.control & CTL_INFO) != 0) /* FIXME: tidy this up */ - if (pat_patctl.jit != 0) + if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) { size_t jitsize; if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0) { if (jitsize > 0) - fprintf(outfile, "JIT study was successful\n"); + fprintf(outfile, "JIT compilation was successful\n"); else #ifdef SUPPORT_JIT - fprintf(outfile, "JIT study was not successful\n"); + fprintf(outfile, "JIT compilation was not successful\n"); #else - fprintf(outfile, "JIT support is not available in this version of PCRE\n"); + fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); #endif } } @@ -4455,10 +4455,8 @@ show_memory = (dat_datctl.control & CTL_MEMORY) != 0; actually used. If jit_stack == NULL, no stack has yet been assigned. */ #ifdef FIXME -if ((dat_datctl.control & CTL_JITVERIFY) != 0 && - - jit_stack == NULL && extra != NULL) - { PCRE2_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); } +if ((dat_datctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) + { PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, jit_stack); } #endif diff --git a/testdata/testinput10 b/testdata/testinput10 index d14e222..8158623 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -223,9 +223,6 @@ /\x{100}abc(xyz(?1))/IB,utf -/a\x{1234}b/utf,posix - a\x{1234}b - /\777/I,utf \x{1ff} \777 @@ -359,12 +356,6 @@ /abc/utf,never_utf -/\w/posix - +++\x{c2} - -/\w/ucp,posix - +++\x{c2} - /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf diff --git a/testdata/testinput14 b/testdata/testinput14 new file mode 100644 index 0000000..2d150b4 --- /dev/null +++ b/testdata/testinput14 @@ -0,0 +1,9 @@ +# This test is run only when JIT support is not available. It checks that an +# attempt to use it has the expected behaviour. It also tests things that +# are different without JIT. + +/abc/I,jit + +/a*/I + +# End of testinput14 diff --git a/testdata/testinput15 b/testdata/testinput15 new file mode 100644 index 0000000..47bbb11 --- /dev/null +++ b/testdata/testinput15 @@ -0,0 +1,87 @@ +# This test is run only when JIT support is available. It checks for a +# successful and an unsuccessful JIT compile, and a couple of things that are +# different with JIT. + +/abc/I,jit + +/(?(?C1)(?=a)a)/I,jit + +/(?(?C1)(?=a)a)/I + +/a*/I + +/(?(R)a*(?1)|((?R))b)/jit + aaaabcde + +# Test various compile modes + +#pattern jit,jitverify + +/abcd/ + abcd + xyz + +/abcd/ + abcd + ab\=ps + ab\=ph + xyz + +/abcd/ + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=1 + abcd + ab\=ps + ab\=ph + xyz + xyz\=ps + +/abcd/jit=2 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=3 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=4 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=5 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=6 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/jit=7 + abcd + ab\=ps + ab\=ph + xyz + +/abcd/I,jit=2 + +/(*NO_START_OPT)a(*:m)b/mark + a + +/^12345678abcd/m + 12345678abcd + +# End of testinput15 diff --git a/testdata/testinput16 b/testdata/testinput16 new file mode 100644 index 0000000..7240158 --- /dev/null +++ b/testdata/testinput16 @@ -0,0 +1,81 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface, which is supported only with the 8-bit library. This test should +# not be run with JIT (which is not available for the POSIX interface). + +#forbid_utf +#pattern posix + +/abc/ + abc + *** Failers + +/^abc|def/ + abcdef + abcdef\=notbol + +/.*((abc)$|(def))/ + defabc + defabc\=noteol + +/the quick brown fox/ + the quick brown fox + *** Failers + The Quick Brown Fox + +/the quick brown fox/i + the quick brown fox + The Quick Brown Fox + +/abc.def/ + *** Failers + abc\ndef + +/abc$/ + abc + abc\n + +/(abc)\2/ + +/(abc\1)/ + abc + +/a*(b+)(z)(z)/ + aaaabbbbzzzz + aaaabbbbzzzz\=ovector=0 + aaaabbbbzzzz\=ovector=1 + aaaabbbbzzzz\=ovector=2 + +/ab.cd/ + ab-cd + ab=cd + ** Failers + ab\ncd + +/ab.cd/s + ab-cd + ab=cd + ab\ncd + +/a(b)c/no_auto_capture + abc + +/a(?Pb)c/no_auto_capture + abc + +/a?|b?/ + abc + ** Failers + ddd\=notempty + +/\w+A/ + CDAAAAB + +/\w+A/ungreedy + CDAAAAB + +/\Biss\B/I,aftertext + Mississippi + +/abc/\ + +# End of testdata/testinput16 diff --git a/testdata/testinput17 b/testdata/testinput17 new file mode 100644 index 0000000..a517905 --- /dev/null +++ b/testdata/testinput17 @@ -0,0 +1,17 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). + +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + +/\w/ + +++\x{c2} + +/\w/ucp + +++\x{c2} + +# End of testdata/testinput17 diff --git a/testdata/testinput9 b/testdata/testinput9 index 7ce8547..eeb5edb 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -1,86 +1,7 @@ -# This set of tests is run only with the 8-bit library. They do not require -# UTF-8 or Unicode property support. The file starts with all the tests of -# the POSIX interface, because that is supported only with the 8-bit library. +# This set of tests is run only with the 8-bit library. They must not require +# UTF-8 or Unicode property support. */ #forbid_utf -#pattern posix - -/abc/ - abc - *** Failers - -/^abc|def/ - abcdef - abcdef\=notbol - -/.*((abc)$|(def))/ - defabc - defabc\=noteol - -/the quick brown fox/ - the quick brown fox - *** Failers - The Quick Brown Fox - -/the quick brown fox/i - the quick brown fox - The Quick Brown Fox - -/abc.def/ - *** Failers - abc\ndef - -/abc$/ - abc - abc\n - -/(abc)\2/ - -/(abc\1)/ - abc - -/a*(b+)(z)(z)/ - aaaabbbbzzzz - aaaabbbbzzzz\=ovector=0 - aaaabbbbzzzz\=ovector=1 - aaaabbbbzzzz\=ovector=2 - -/ab.cd/ - ab-cd - ab=cd - ** Failers - ab\ncd - -/ab.cd/s - ab-cd - ab=cd - ab\ncd - -/a(b)c/no_auto_capture - abc - -/a(?Pb)c/no_auto_capture - abc - -/a?|b?/ - abc - ** Failers - ddd\=notempty - -/\w+A/ - CDAAAAB - -/\w+A/ungreedy - CDAAAAB - -/\Biss\B/I,aftertext - Mississippi - -/abc/\ - -#pattern -posix - -# End of POSIX tests /a\Cb/ aXb diff --git a/testdata/testoutput10 b/testdata/testoutput10 index f5e273d..213247b 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -715,10 +715,6 @@ First code unit = \xc4 Last code unit = 'z' Subject length lower bound = 7 -/a\x{1234}b/utf,posix - a\x{1234}b - 0: a\x{1234}b - /\777/I,utf Capturing subpattern count = 0 Options: utf @@ -1136,14 +1132,6 @@ Failed: error 174 at offset 7: using UTF is disabled by the application /abc/utf,never_utf Failed: error 174 at offset 0: using UTF is disabled by the application -/\w/posix - +++\x{c2} -No match: POSIX code 17: match failed - -/\w/ucp,posix - +++\x{c2} - 0: \xc2 - /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf ------------------------------------------------------------------ Bra diff --git a/testdata/testoutput14 b/testdata/testoutput14 new file mode 100644 index 0000000..a2b5af8 --- /dev/null +++ b/testdata/testoutput14 @@ -0,0 +1,21 @@ +# This test is run only when JIT support is not available. It checks that an +# attempt to use it has the expected behaviour. It also tests things that +# are different without JIT. + +/abc/I,jit +Capturing subpattern count = 0 +No options +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 +JIT support is not available in this version of PCRE + +/a*/I +Capturing subpattern count = 0 +May match empty string +No options +No first code unit +No last code unit +Subject length lower bound = 0 + +# End of testinput14 diff --git a/testdata/testoutput15 b/testdata/testoutput15 new file mode 100644 index 0000000..630ae04 --- /dev/null +++ b/testdata/testoutput15 @@ -0,0 +1,164 @@ +# This test is run only when JIT support is available. It checks for a +# successful and an unsuccessful JIT compile, and a couple of things that are +# different with JIT. + +/abc/I,jit +Capturing subpattern count = 0 +No options +First code unit = 'a' +Need char = 'c' +Subject length lower bound = 3 +No starting char list +JIT study was successful + +/(?(?C1)(?=a)a)/I,jit +Capturing subpattern count = 0 +May match empty string +No options +No first code unit +No last code unit +Study returned NULL +JIT study was not successful + +/(?(?C1)(?=a)a)/I +Capturing subpattern count = 0 +May match empty string +No options +No first code unit +No last code unit +Subject length lower bound = -1 +No starting char list +JIT study was not successful + +/a*/I +Capturing subpattern count = 0 +May match empty string +No options +No first code unit +No last code unit +Study returned NULL + +/(?(R)a*(?1)|((?R))b)/S+ + aaaabcde +Error -27 (JIT stack limit reached) + +# Test various compile modes + +#pattern jit,jitverify + +/abcd/ + abcd + 0: abcd (JIT) + xyz +No match (JIT) + +/abcd/ + abcd + 0: abcd (JIT) + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab (JIT) + xyz +No match (JIT) + +/abcd/ + abcd + 0: abcd (JIT) + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab (JIT) + xyz +No match (JIT) + +/abcd/jit=1 + abcd + 0: abcd (JIT) + ab\P +Partial match: ab + ab\P\P +Partial match: ab + xyz +No match (JIT) + xyz\P +No match + +/abcd/jit=2 + abcd + 0: abcd + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab + xyz +No match + +/abcd/jit=3 + abcd + 0: abcd (JIT) + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab + xyz +No match (JIT) + +/abcd/jit=4 + abcd + 0: abcd + ab\P +Partial match: ab + ab\P\P +Partial match: ab (JIT) + xyz +No match + +/abcd/jit=5 + abcd + 0: abcd (JIT) + ab\P +Partial match: ab + ab\P\P +Partial match: ab (JIT) + xyz +No match (JIT) + +/abcd/jit=6 + abcd + 0: abcd + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab (JIT) + xyz +No match + +/abcd/jit=7 + abcd + 0: abcd (JIT) + ab\P +Partial match: ab (JIT) + ab\P\P +Partial match: ab (JIT) + xyz +No match (JIT) + +/abcd/I,jit=2 +Capturing subpattern count = 0 +No options +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 +No starting char list +JIT study was successful + +/(*NO_START_OPT)a(*:m)b/mark + a +No match, mark = m (JIT) + +/^12345678abcd/m + 12345678abcd + 0: 12345678abcd (JIT) + +# End of testinput15 diff --git a/testdata/testoutput16 b/testdata/testoutput16 new file mode 100644 index 0000000..1a3de59 --- /dev/null +++ b/testdata/testoutput16 @@ -0,0 +1,128 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface, which is supported only with the 8-bit library. This test should +# not be run with JIT (which is not available for the POSIX interface). + +#forbid_utf +#pattern posix + +/abc/ + abc + 0: abc + *** Failers +No match: POSIX code 17: match failed + +/^abc|def/ + abcdef + 0: abc + abcdef\=notbol + 0: def + +/.*((abc)$|(def))/ + defabc + 0: defabc + 1: abc + 2: abc + defabc\=noteol + 0: def + 1: def + 3: def + +/the quick brown fox/ + the quick brown fox + 0: the quick brown fox + *** Failers +No match: POSIX code 17: match failed + The Quick Brown Fox +No match: POSIX code 17: match failed + +/the quick brown fox/i + the quick brown fox + 0: the quick brown fox + The Quick Brown Fox + 0: The Quick Brown Fox + +/abc.def/ + *** Failers +No match: POSIX code 17: match failed + abc\ndef +No match: POSIX code 17: match failed + +/abc$/ + abc + 0: abc + abc\n + 0: abc + +/(abc)\2/ +Failed: POSIX code 15: bad back reference at offset 7 + +/(abc\1)/ + abc +No match: POSIX code 17: match failed + +/a*(b+)(z)(z)/ + aaaabbbbzzzz + 0: aaaabbbbzz + 1: bbbb + 2: z + 3: z + aaaabbbbzzzz\=ovector=0 +Matched without capture + aaaabbbbzzzz\=ovector=1 + 0: aaaabbbbzz + aaaabbbbzzzz\=ovector=2 + 0: aaaabbbbzz + 1: bbbb + +/ab.cd/ + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd + ** Failers +No match: POSIX code 17: match failed + ab\ncd +No match: POSIX code 17: match failed + +/ab.cd/s + ab-cd + 0: ab-cd + ab=cd + 0: ab=cd + ab\ncd + 0: ab\x0acd + +/a(b)c/no_auto_capture + abc +Matched with REG_NOSUB + +/a(?Pb)c/no_auto_capture + abc +Matched with REG_NOSUB + +/a?|b?/ + abc + 0: a + ** Failers + 0: + ddd\=notempty +No match: POSIX code 17: match failed + +/\w+A/ + CDAAAAB + 0: CDAAAA + +/\w+A/ungreedy + CDAAAAB + 0: CDA + +/\Biss\B/I,aftertext +** Ignored with POSIX interface: info + Mississippi + 0: iss + 0+ issippi + +/abc/\ +Failed: POSIX code 9: bad escape sequence at offset 4 + +# End of testdata/testinput16 diff --git a/testdata/testoutput17 b/testdata/testoutput17 new file mode 100644 index 0000000..954b4b5 --- /dev/null +++ b/testdata/testoutput17 @@ -0,0 +1,20 @@ +# This set of tests is run only with the 8-bit library. It tests the POSIX +# interface with UTF/UCP support, which is supported only with the 8-bit +# library. This test should not be run with JIT (which is not available for the +# POSIX interface). + +#pattern posix + +/a\x{1234}b/utf + a\x{1234}b + 0: a\x{1234}b + +/\w/ + +++\x{c2} +No match: POSIX code 17: match failed + +/\w/ucp + +++\x{c2} + 0: \xc2 + +# End of testdata/testinput17 diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 5d1460a..2f68a74 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -1,133 +1,7 @@ -# This set of tests is run only with the 8-bit library. They do not require -# UTF-8 or Unicode property support. The file starts with all the tests of -# the POSIX interface, because that is supported only with the 8-bit library. +# This set of tests is run only with the 8-bit library. They must not require +# UTF-8 or Unicode property support. */ #forbid_utf -#pattern posix - -/abc/ - abc - 0: abc - *** Failers -No match: POSIX code 17: match failed - -/^abc|def/ - abcdef - 0: abc - abcdef\=notbol - 0: def - -/.*((abc)$|(def))/ - defabc - 0: defabc - 1: abc - 2: abc - defabc\=noteol - 0: def - 1: def - 3: def - -/the quick brown fox/ - the quick brown fox - 0: the quick brown fox - *** Failers -No match: POSIX code 17: match failed - The Quick Brown Fox -No match: POSIX code 17: match failed - -/the quick brown fox/i - the quick brown fox - 0: the quick brown fox - The Quick Brown Fox - 0: The Quick Brown Fox - -/abc.def/ - *** Failers -No match: POSIX code 17: match failed - abc\ndef -No match: POSIX code 17: match failed - -/abc$/ - abc - 0: abc - abc\n - 0: abc - -/(abc)\2/ -Failed: POSIX code 15: bad back reference at offset 7 - -/(abc\1)/ - abc -No match: POSIX code 17: match failed - -/a*(b+)(z)(z)/ - aaaabbbbzzzz - 0: aaaabbbbzz - 1: bbbb - 2: z - 3: z - aaaabbbbzzzz\=ovector=0 -Matched without capture - aaaabbbbzzzz\=ovector=1 - 0: aaaabbbbzz - aaaabbbbzzzz\=ovector=2 - 0: aaaabbbbzz - 1: bbbb - -/ab.cd/ - ab-cd - 0: ab-cd - ab=cd - 0: ab=cd - ** Failers -No match: POSIX code 17: match failed - ab\ncd -No match: POSIX code 17: match failed - -/ab.cd/s - ab-cd - 0: ab-cd - ab=cd - 0: ab=cd - ab\ncd - 0: ab\x0acd - -/a(b)c/no_auto_capture - abc -Matched with REG_NOSUB - -/a(?Pb)c/no_auto_capture - abc -Matched with REG_NOSUB - -/a?|b?/ - abc - 0: a - ** Failers - 0: - ddd\=notempty -No match: POSIX code 17: match failed - -/\w+A/ - CDAAAAB - 0: CDAAAA - -/\w+A/ungreedy - CDAAAAB - 0: CDA - -/\Biss\B/I,aftertext -** Ignored with POSIX interface: info - Mississippi - 0: iss - 0+ issippi - -/abc/\ -Failed: POSIX code 9: bad escape sequence at offset 4 - -#pattern -posix - -# End of POSIX tests /a\Cb/ aXb