diff --git a/RunTest b/RunTest index e6b041f..2302e98 100755 --- a/RunTest +++ b/RunTest @@ -64,7 +64,7 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support" title11="Test 11: Specials for the basic 16-bit and 32-bit libraries" title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support" title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries" -title14="Test 14: Non-JIT limits tests" +title14="Test 14: Non-JIT limits and other non-JIT tests" title15="Test 15: JIT-specific features when JIT is not available" title16="Test 16: JIT-specific features when JIT is available" title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP" diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 8372873..988b169 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "05 October 2014" "PCRE2 10.00" +.TH PCRE2API 3 "10 October 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -47,16 +47,12 @@ document for an overview of all the PCRE2 documentation. .rs .sp .nf -.B PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *\fImatch_data\fP); -.sp .B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); .sp .B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); .sp .B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); .sp -.B PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *\fImatch_data\fP); -.sp .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); .fi . @@ -2054,10 +2050,6 @@ had. .nf .B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); .sp -.B PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *\fImatch_data\fP); -.sp -.B PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *\fImatch_data\fP); -.sp .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); .fi .P @@ -2069,35 +2061,15 @@ pointer to the zero-terminated name, which is within the compiled pattern. Otherwise NULL is returned. A (*MARK) name may be available after a failed match or a partial match, as well as after a successful one. .P -The other three functions yield values that give information about the part of -the subject string that was inspected during a successful match or a partial -match. Their results are undefined after a failed match. They return the -following values, respectively: -.sp -(1) The offset of the leftmost character that was inspected during the match. -This can be earlier than the point at which the match started if the pattern -contains lookbehind assertions or \eb or \eB at the start. -.sp -(2) The offset of the character that follows the rightmost character that was -inspected during the match. This can be after the end of the match if the -pattern contains lookahead assertions. -.sp -(3) The offset of the character at which the successful or partial match -started. This can be different to the value of \fIovector[0]\fP if the pattern -contains the \eK escape sequence. -.P -For example, if the pattern (?<=abc)xx\eKyy(?=def) is matched against the -string "123abcxxyydef123", the resulting offsets are: -.sp - ovector[0] 8 - ovector[1] 10 - leftchar 3 - rightchar 13 - startchar 6 -.sp -The \fBallusedtext\fP modifier in \fBpcre2test\fP can be used to display a -longer string that shows the leftmost and rightmost characters in a match -instead of just the matched string. +The offset of the character at which the successful or partial match started is +returned by \fBpcre2_get_startchar()\fP. This can be different to the value of +\fIovector[0]\fP if the pattern contains the \eK escape sequence. This +information is needed when doing partial matching over multiple data segments +(see the +.\" HREF +\fBpcre2partial\fP +.\" +documentation). . . .\" HTML @@ -2654,6 +2626,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 05 October 2014 +Last updated: 10 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 05f2b8f..445eaab 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "05 October 2014" "PCRE 10.00" +.TH PCRE2TEST 1 "10 October 2014" "PCRE 10.00" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -630,7 +630,7 @@ not affect the compilation process. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allusedtext show all consulted text + allusedtext show all consulted text /g global global matching jitverify verify JIT usage mark show mark values @@ -688,7 +688,7 @@ pattern. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allusedtext show all consulted text + allusedtext show all consulted text (non-JIT only) altglobal alternative global matching callout_capture show captures at callout time callout_data= set a value to pass via callouts @@ -724,11 +724,13 @@ requests the same action for captured substrings as well as the main matched substring. In each case the remainder is output on the following line with a plus character following the capture number. .P -The \fBallusedtext\fP modifier requests that all the text that was consulted -during a successful pattern match be shown. This affects the output if there -is a lookbehind at the start of a match, or a lookahead at the end, or if \eK -is used in the pattern. Characters that precede or follow the start and end of -the actual match are indicated in the output by '<' or '>' characters +The \fBallusedtext\fP modifier requests that all the text that was consulted +during a successful pattern match by the interpreter should be shown. This +feature is not supported for JIT matching, and if requested with JIT it is +ignored (with a warning message). Setting this modifier affects the output if +there is a lookbehind at the start of a match, or a lookahead at the end, or if +\eK is used in the pattern. Characters that precede or follow the start and end +of the actual match are indicated in the output by '<' or '>' characters underneath them. Here is an example: .sp /(?<=pqr)abc(?=xyz)/ @@ -1151,6 +1153,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 05 October 2014 +Last updated: 10 October 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 7cd0271..9122010 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -415,11 +415,9 @@ PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \ pcre2_match_data *, pcre2_match_context *); \ PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \ -PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *); @@ -525,11 +523,9 @@ pcre2_compile are called by application code. */ #define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_) #define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_) #define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_) -#define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_) #define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_) #define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_) #define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_) -#define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_) #define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_) #define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_) #define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_) diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 89bdcc2..272fc89 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -515,10 +515,10 @@ same response. */ /* These macros pack up tests that are used for partial matching, and which appear several times in the code. We set the "hit end" flag if the pointer is -at the end of the subject and also past the start of the subject (i.e. -something has been matched). For hard partial matching, we then return -immediately. The second one is used when we already know we are past the end of -the subject. */ +at the end of the subject and also past the earliest inspected character (i.e. +something has been matched, even if not part of the actual matched string). For +hard partial matching, we then return immediately. The second one is used when +we already know we are past the end of the subject. */ #define CHECK_PARTIAL()\ if (mb->partial != 0 && eptr >= mb->end_subject && \ diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index 3db6cde..f793f39 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -94,18 +94,6 @@ if (match_data != NULL) -/************************************************* -* Get left-most code unit in match * -*************************************************/ - -PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION -pcre2_get_leftchar(pcre2_match_data *match_data) -{ -return match_data->leftchar; -} - - - /************************************************* * Get last mark in match * *************************************************/ @@ -142,18 +130,6 @@ return match_data->oveccount; -/************************************************* -* Get right-most code unit in match * -*************************************************/ - -PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION -pcre2_get_rightchar(pcre2_match_data *match_data) -{ -return match_data->rightchar; -} - - - /************************************************* * Get starting code unit in match * *************************************************/ diff --git a/src/pcre2test.c b/src/pcre2test.c index 8ece7d5..20cd57b 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -4381,9 +4381,16 @@ modifiers. */ if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS)) { - printf("** Finding match limits is not relevant for DFA matching: ignored\n"); + fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n"); dat_datctl.control &= ~CTL_FINDLIMITS; } + +if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0 && + FLD(compiled_code, executable_jit) != NULL) + { + fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n"); + dat_datctl.control &= ~CTL_ALLUSEDTEXT; + } /* As pcre2_match_data_create() imposes a minimum of 1 on the ovector count, we must do so too. */ diff --git a/testdata/testinput14 b/testdata/testinput14 index d882b61..69ca2b6 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -1,7 +1,11 @@ -# These are tests of the match-limiting features. The results are different for +# These are: +# +# (1) Tests of the match-limiting features. The results are different for # interpretive or JIT matching, so this test should not be run with JIT. The # same tests are run using JIT in test 16. +# (2) Other tests that must not be run with JIT. + /(a+)*zz/I aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits aaaaaaaaaaaaaz\=find_limits @@ -80,5 +84,29 @@ /(?(R)a*(?1)|((?R))b)/ aaaabcde + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + abcxyzpqr\=aftertext + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + xyzpqrabcxyzpqr\=aftertext + +/a\b/ + a.\=allusedtext + a\=allusedtext + +/abc\Kxyz/ + abcxyz\=allusedtext + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg # End of testinput14 diff --git a/testdata/testinput2 b/testdata/testinput2 index fa12400..3522a16 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3955,27 +3955,6 @@ a random value. /Ix aaaabcde aaaabcde\=ovector=100 -/abc(?=xyz)/allusedtext - abcxyzpqr - abcxyzpqr\=aftertext - -/(?<=pqr)abc(?=xyz)/allusedtext - xyzpqrabcxyzpqr - xyzpqrabcxyzpqr\=aftertext - -/a\b/ - a.\=allusedtext - a\=allusedtext - -/abc\Kxyz/ - abcxyz\=allusedtext - -/abc(?=xyz(*ACCEPT))/ - abcxyz\=allusedtext - -/abc(?=abcde)(?=ab)/allusedtext - abcabcdefg - /a*?b*?/ ab diff --git a/testdata/testinput5 b/testdata/testinput5 index eff10f9..a05f1d7 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1625,7 +1625,4 @@ /\X?abc/utf,no_start_optimize \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 -/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext - \x{100}\x{200}\x{300} - # End of testinput5 diff --git a/testdata/testoutput14 b/testdata/testoutput14 index c75f4ec..b3fc8d3 100644 --- a/testdata/testoutput14 +++ b/testdata/testoutput14 @@ -1,7 +1,11 @@ -# These are tests of the match-limiting features. The results are different for +# These are: +# +# (1) Tests of the match-limiting features. The results are different for # interpretive or JIT matching, so this test should not be run with JIT. The # same tests are run using JIT in test 16. +# (2) Other tests that must not be run with JIT. + /(a+)*zz/I Capturing subpattern count = 1 Starting code units: a z @@ -191,5 +195,48 @@ Failed: error -49: nested recursion at the same subject position /(?(R)a*(?1)|((?R))b)/ aaaabcde Failed: error -49: nested recursion at the same subject position + +# The allusedtext modifier does not work with JIT, which does not maintain +# the leftchar/rightchar data. + +/abc(?=xyz)/allusedtext + abcxyzpqr + 0: abcxyz + >>> + abcxyzpqr\=aftertext + 0: abcxyz + >>> + 0+ xyzpqr + +/(?<=pqr)abc(?=xyz)/allusedtext + xyzpqrabcxyzpqr + 0: pqrabcxyz + <<< >>> + xyzpqrabcxyzpqr\=aftertext + 0: pqrabcxyz + <<< >>> + 0+ xyzpqr + +/a\b/ + a.\=allusedtext + 0: a. + > + a\=allusedtext + 0: a + +/abc\Kxyz/ + abcxyz\=allusedtext + 0: abcxyz + <<< + +/abc(?=xyz(*ACCEPT))/ + abcxyz\=allusedtext + 0: abcxyz + >>> + +/abc(?=abcde)(?=ab)/allusedtext + abcabcdefg + 0: abcabcde + >>>>> # End of testinput14 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 2da7604..d1b74dc 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -13463,46 +13463,6 @@ No match aaaabcde\=ovector=100 0: aaaab -/abc(?=xyz)/allusedtext - abcxyzpqr - 0: abcxyz - >>> - abcxyzpqr\=aftertext - 0: abcxyz - >>> - 0+ xyzpqr - -/(?<=pqr)abc(?=xyz)/allusedtext - xyzpqrabcxyzpqr - 0: pqrabcxyz - <<< >>> - xyzpqrabcxyzpqr\=aftertext - 0: pqrabcxyz - <<< >>> - 0+ xyzpqr - -/a\b/ - a.\=allusedtext - 0: a. - > - a\=allusedtext - 0: a - -/abc\Kxyz/ - abcxyz\=allusedtext - 0: abcxyz - <<< - -/abc(?=xyz(*ACCEPT))/ - abcxyz\=allusedtext - 0: abcxyz - >>> - -/abc(?=abcde)(?=ab)/allusedtext - abcabcdefg - 0: abcabcde - >>>>> - /a*?b*?/ ab 0: diff --git a/testdata/testoutput5 b/testdata/testoutput5 index d0f3bef..438c4d3 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -3991,9 +3991,4 @@ Subject length lower bound = 1 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 0: A\x{300}abc -/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext - \x{100}\x{200}\x{300} - 0: \x{100}\x{200}\x{300} - <<<<<<< >>>>>>> - # End of testinput5