Remove leftchar/rightchar from the public API.

2014-10-10 11:55:28 +00:00 · 2014-10-10 11:55:28 +00:00 · 62d728bb0b
commit 62d728bb0b
parent 0907fc6e92
13 changed files with 112 additions and 153 deletions
--- a/2
+++ b/2
@ -64,7 +64,7 @@ title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support"
 title11="Test 11: Specials for the basic 16-bit and 32-bit libraries"
 title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support"
 title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries"
-title14="Test 14: Non-JIT limits tests"
+title14="Test 14: Non-JIT limits and other non-JIT tests"
 title15="Test 15: JIT-specific features when JIT is not available"
 title16="Test 16: JIT-specific features when JIT is available"
 title17="Test 17: Tests of the POSIX interface, excluding UTF/UCP"
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@ -1,4 +1,4 @@
-.TH PCRE2API 3 "05 October 2014" "PCRE2 10.00"
+.TH PCRE2API 3 "10 October 2014" "PCRE2 10.00"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@ -47,16 +47,12 @@ document for an overview of all the PCRE2 documentation.
 .rs
 .sp
 .nf
-.B PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *\fImatch_data\fP);
-.sp
 .B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP);
 .sp
 .B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP);
 .sp
 .B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP);
 .sp
-.B PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *\fImatch_data\fP);
-.sp
 .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
 .fi
 .
@ -2054,10 +2050,6 @@ had.
 .nf
 .B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP);
 .sp
-.B PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *\fImatch_data\fP);
-.sp
-.B PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *\fImatch_data\fP);
-.sp
 .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
 .fi
 .P
@ -2069,35 +2061,15 @@ pointer to the zero-terminated name, which is within the compiled pattern.
 Otherwise NULL is returned. A (*MARK) name may be available after a failed 
 match or a partial match, as well as after a successful one.
 .P
-The other three functions yield values that give information about the part of 
-the subject string that was inspected during a successful match or a partial 
-match. Their results are undefined after a failed match. They return the 
-following values, respectively:
-.sp
-(1) The offset of the leftmost character that was inspected during the match.
-This can be earlier than the point at which the match started if the pattern
-contains lookbehind assertions or \eb or \eB at the start.
-.sp
-(2) The offset of the character that follows the rightmost character that was
-inspected during the match. This can be after the end of the match if the 
-pattern contains lookahead assertions.
-.sp
-(3) The offset of the character at which the successful or partial match 
-started. This can be different to the value of \fIovector[0]\fP if the pattern 
-contains the \eK escape sequence.
-.P
-For example, if the pattern (?<=abc)xx\eKyy(?=def) is matched against the
-string "123abcxxyydef123", the resulting offsets are:
-.sp
-  ovector[0]   8
-  ovector[1]  10
-  leftchar     3
-  rightchar   13
-  startchar    6
-.sp
-The \fBallusedtext\fP modifier in \fBpcre2test\fP can be used to display a
-longer string that shows the leftmost and rightmost characters in a match
-instead of just the matched string.
+The offset of the character at which the successful or partial match started is
+returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
+\fIovector[0]\fP if the pattern contains the \eK escape sequence. This 
+information is needed when doing partial matching over multiple data segments 
+(see the
+.\" HREF
+\fBpcre2partial\fP
+.\"
+documentation).
 .
 .
 .\" HTML <a name="errorlist"></a>
@ -2654,6 +2626,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 05 October 2014
+Last updated: 10 October 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "05 October 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "10 October 2014" "PCRE 10.00"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@ -630,7 +630,7 @@ not affect the compilation process.
      aftertext                 show text after match
      allaftertext              show text after captures
      allcaptures               show all captures
-      allusedtext               show all consulted text 
+      allusedtext               show all consulted text
  /g  global                    global matching
      jitverify                 verify JIT usage
      mark                      show mark values
@ -688,7 +688,7 @@ pattern.
      aftertext                 show text after match
      allaftertext              show text after captures
      allcaptures               show all captures
-      allusedtext               show all consulted text 
+      allusedtext               show all consulted text (non-JIT only)
      altglobal                 alternative global matching
      callout_capture           show captures at callout time
      callout_data=<n>          set a value to pass via callouts
@ -724,11 +724,13 @@ requests the same action for captured substrings as well as the main matched
 substring. In each case the remainder is output on the following line with a
 plus character following the capture number.
 .P
-The \fBallusedtext\fP modifier requests that all the text that was consulted 
-during a successful pattern match be shown. This affects the output if there 
-is a lookbehind at the start of a match, or a lookahead at the end, or if \eK 
-is used in the pattern. Characters that precede or follow the start and end of 
-the actual match are indicated in the output by '<' or '>' characters 
+The \fBallusedtext\fP modifier requests that all the text that was consulted
+during a successful pattern match by the interpreter should be shown. This
+feature is not supported for JIT matching, and if requested with JIT it is
+ignored (with a warning message). Setting this modifier affects the output if
+there is a lookbehind at the start of a match, or a lookahead at the end, or if
+\eK is used in the pattern. Characters that precede or follow the start and end
+of the actual match are indicated in the output by '<' or '>' characters
 underneath them. Here is an example:
 .sp
  /(?<=pqr)abc(?=xyz)/
@ -1151,6 +1153,6 @@ Cambridge CB2 3QH, England.
 .rs
 .sp
 .nf
-Last updated: 05 October 2014
+Last updated: 10 October 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@ -415,11 +415,9 @@ PCRE2_EXP_DECL int         pcre2_match(const pcre2_code *, \
                             PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
                             pcre2_match_data *, pcre2_match_context *); \
 PCRE2_EXP_DECL void        pcre2_match_data_free(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SIZE  pcre2_get_leftchar(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SPTR  pcre2_get_mark(pcre2_match_data *); \
 PCRE2_EXP_DECL uint32_t    pcre2_get_ovector_count(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SIZE  pcre2_get_rightchar(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE  pcre2_get_startchar(pcre2_match_data *);


@ -525,11 +523,9 @@ pcre2_compile are called by application code. */
 #define pcre2_general_context_create          PCRE2_SUFFIX(pcre2_general_context_create_)
 #define pcre2_general_context_free            PCRE2_SUFFIX(pcre2_general_context_free_)
 #define pcre2_get_error_message               PCRE2_SUFFIX(pcre2_get_error_message_)
-#define pcre2_get_leftchar                    PCRE2_SUFFIX(pcre2_get_leftchar_)
 #define pcre2_get_mark                        PCRE2_SUFFIX(pcre2_get_mark_)
 #define pcre2_get_ovector_pointer             PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
 #define pcre2_get_ovector_count               PCRE2_SUFFIX(pcre2_get_ovector_count_)
-#define pcre2_get_rightchar                   PCRE2_SUFFIX(pcre2_get_rightchar_)
 #define pcre2_get_startchar                   PCRE2_SUFFIX(pcre2_get_startchar_)
 #define pcre2_jit_compile                     PCRE2_SUFFIX(pcre2_jit_compile_)
 #define pcre2_jit_match                       PCRE2_SUFFIX(pcre2_jit_match_)
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -515,10 +515,10 @@ same response. */

 /* These macros pack up tests that are used for partial matching, and which
 appear several times in the code. We set the "hit end" flag if the pointer is
-at the end of the subject and also past the start of the subject (i.e.
-something has been matched). For hard partial matching, we then return
-immediately. The second one is used when we already know we are past the end of
-the subject. */
+at the end of the subject and also past the earliest inspected character (i.e.
+something has been matched, even if not part of the actual matched string). For
+hard partial matching, we then return immediately. The second one is used when
+we already know we are past the end of the subject. */

 #define CHECK_PARTIAL()\
  if (mb->partial != 0 && eptr >= mb->end_subject && \
--- a/src/pcre2_match_data.c
+++ b/src/pcre2_match_data.c
@ -94,18 +94,6 @@ if (match_data != NULL)



-/*************************************************
-*         Get left-most code unit in match       *
-*************************************************/
-
-PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
-pcre2_get_leftchar(pcre2_match_data *match_data)
-{
-return match_data->leftchar;
-}
-
-
-
 /*************************************************
 *         Get last mark in match                 *
 *************************************************/
@ -142,18 +130,6 @@ return match_data->oveccount;



-/*************************************************
-*         Get right-most code unit in match      *
-*************************************************/
-
-PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
-pcre2_get_rightchar(pcre2_match_data *match_data)
-{
-return match_data->rightchar;
-}
-
-
-
 /*************************************************
 *         Get starting code unit in match        *
 *************************************************/
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@ -4381,9 +4381,16 @@ modifiers. */

 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
  {
-  printf("** Finding match limits is not relevant for DFA matching: ignored\n");
+  fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
  dat_datctl.control &= ~CTL_FINDLIMITS;
  }
+  
+if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0 && 
+    FLD(compiled_code, executable_jit) != NULL)
+  {
+  fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
+  dat_datctl.control &= ~CTL_ALLUSEDTEXT;  
+  }  

 /* As pcre2_match_data_create() imposes a minimum of 1 on the ovector count, we
 must do so too. */
--- a/testdata/testinput14
+++ b/testdata/testinput14
@ -1,7 +1,11 @@
-# These are tests of the match-limiting features. The results are different for 
+# These are:
+#
+# (1) Tests of the match-limiting features. The results are different for
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 16.

+# (2) Other tests that must not be run with JIT.
+
 /(a+)*zz/I
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
  aaaaaaaaaaaaaz\=find_limits
@ -80,5 +84,29 @@

 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
+    
+# The allusedtext modifier does not work with JIT, which does not maintain
+# the leftchar/rightchar data.
+
+/abc(?=xyz)/allusedtext
+    abcxyzpqr
+    abcxyzpqr\=aftertext
+    
+/(?<=pqr)abc(?=xyz)/allusedtext
+    xyzpqrabcxyzpqr
+    xyzpqrabcxyzpqr\=aftertext
+    
+/a\b/
+    a.\=allusedtext
+    a\=allusedtext  
+
+/abc\Kxyz/
+    abcxyz\=allusedtext
+
+/abc(?=xyz(*ACCEPT))/
+    abcxyz\=allusedtext
+
+/abc(?=abcde)(?=ab)/allusedtext
+    abcabcdefg

 # End of testinput14
--- a/testdata/testinput2
+++ b/testdata/testinput2
@ -3955,27 +3955,6 @@ a random value. /Ix
    aaaabcde
    aaaabcde\=ovector=100

-/abc(?=xyz)/allusedtext
-    abcxyzpqr
-    abcxyzpqr\=aftertext
-    
-/(?<=pqr)abc(?=xyz)/allusedtext
-    xyzpqrabcxyzpqr
-    xyzpqrabcxyzpqr\=aftertext
-    
-/a\b/
-    a.\=allusedtext
-    a\=allusedtext  
-
-/abc\Kxyz/
-    abcxyz\=allusedtext
-
-/abc(?=xyz(*ACCEPT))/
-    abcxyz\=allusedtext
-
-/abc(?=abcde)(?=ab)/allusedtext
-    abcabcdefg
-
 /a*?b*?/
    ab

--- a/testdata/testinput5
+++ b/testdata/testinput5
@ -1625,7 +1625,4 @@
 /\X?abc/utf,no_start_optimize
 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06

-/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
-    \x{100}\x{200}\x{300}
-
 # End of testinput5 
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@ -1,7 +1,11 @@
-# These are tests of the match-limiting features. The results are different for 
+# These are:
+#
+# (1) Tests of the match-limiting features. The results are different for
 # interpretive or JIT matching, so this test should not be run with JIT. The
 # same tests are run using JIT in test 16.

+# (2) Other tests that must not be run with JIT.
+
 /(a+)*zz/I
 Capturing subpattern count = 1
 Starting code units: a z 
@ -191,5 +195,48 @@ Failed: error -49: nested recursion at the same subject position
 /(?(R)a*(?1)|((?R))b)/
    aaaabcde
 Failed: error -49: nested recursion at the same subject position
+    
+# The allusedtext modifier does not work with JIT, which does not maintain
+# the leftchar/rightchar data.
+
+/abc(?=xyz)/allusedtext
+    abcxyzpqr
+ 0: abcxyz
+       >>>
+    abcxyzpqr\=aftertext
+ 0: abcxyz
+       >>>
+ 0+ xyzpqr
+    
+/(?<=pqr)abc(?=xyz)/allusedtext
+    xyzpqrabcxyzpqr
+ 0: pqrabcxyz
+    <<<   >>>
+    xyzpqrabcxyzpqr\=aftertext
+ 0: pqrabcxyz
+    <<<   >>>
+ 0+ xyzpqr
+    
+/a\b/
+    a.\=allusedtext
+ 0: a.
+     >
+    a\=allusedtext  
+ 0: a
+
+/abc\Kxyz/
+    abcxyz\=allusedtext
+ 0: abcxyz
+    <<<   
+
+/abc(?=xyz(*ACCEPT))/
+    abcxyz\=allusedtext
+ 0: abcxyz
+       >>>
+
+/abc(?=abcde)(?=ab)/allusedtext
+    abcabcdefg
+ 0: abcabcde
+       >>>>>

 # End of testinput14
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@ -13463,46 +13463,6 @@ No match
    aaaabcde\=ovector=100
 0: aaaab

-/abc(?=xyz)/allusedtext
-    abcxyzpqr
- 0: abcxyz
-       >>>
-    abcxyzpqr\=aftertext
- 0: abcxyz
-       >>>
- 0+ xyzpqr
-    
-/(?<=pqr)abc(?=xyz)/allusedtext
-    xyzpqrabcxyzpqr
- 0: pqrabcxyz
-    <<<   >>>
-    xyzpqrabcxyzpqr\=aftertext
- 0: pqrabcxyz
-    <<<   >>>
- 0+ xyzpqr
-    
-/a\b/
-    a.\=allusedtext
- 0: a.
-     >
-    a\=allusedtext  
- 0: a
-
-/abc\Kxyz/
-    abcxyz\=allusedtext
- 0: abcxyz
-    <<<   
-
-/abc(?=xyz(*ACCEPT))/
-    abcxyz\=allusedtext
- 0: abcxyz
-       >>>
-
-/abc(?=abcde)(?=ab)/allusedtext
-    abcabcdefg
- 0: abcabcde
-       >>>>>
-
 /a*?b*?/
    ab
 0: 
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@ -3991,9 +3991,4 @@ Subject length lower bound = 1
 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
 0: A\x{300}abc

-/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
-    \x{100}\x{200}\x{300}
- 0: \x{100}\x{200}\x{300}
-    <<<<<<<       >>>>>>>
-
 # End of testinput5