From 2caf22dc61c7baf06c9f3a8d9980a62b3d86f8a7 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel" <Philip.Hazel@gmail.com>
Date: Tue, 3 Nov 2015 17:38:00 +0000
Subject: [PATCH] Forbid \K patterns that end before they start in
 pcre2_substitute().

---
 ChangeLog              |  3 +++
 doc/pcre2api.3         | 13 ++++++++-----
 src/pcre2.h            |  1 +
 src/pcre2.h.in         |  1 +
 src/pcre2_error.c      |  8 +++++---
 src/pcre2_substitute.c | 41 ++++++++++++++++++++++++-----------------
 testdata/testinput2    |  3 +++
 testdata/testoutput2   |  4 ++++
 8 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 82a77d7..c43d02c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -257,6 +257,9 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
 
 74. Give an error if a lookbehind assertion is longer than 65535 code units.
 
+75. Give an error in pcre2_substitute() if a match ends before it starts (as a
+result of the use of \K).
+
 
 Version 10.20 30-June-2015
 --------------------------
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index dc6daeb..50d9606 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -2666,7 +2666,9 @@ same number causes an error at compile time.
 This function calls \fBpcre2_match()\fP and then makes a copy of the subject
 string in \fIoutputbuffer\fP, replacing the part that was matched with the
 \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
-be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
+be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in 
+which a \eK item in a lookahead in the pattern causes the match to end before
+it starts are not supported, and give rise to an error return.
 .P
 The first seven arguments of \fBpcre2_substitute()\fP are the same as for
 \fBpcre2_match()\fP, except that the partial matching options are not
@@ -2769,8 +2771,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
 is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
 errors in the replacement string, with more particular errors being
 PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
-PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
-PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
+PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
+PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
+PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
 PCRE2 errors, a text message that describes the error can be obtained by
 calling \fBpcre2_get_error_message()\fP.
 .
@@ -3066,6 +3069,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 16 October 2015
+Last updated: 03 November 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi
diff --git a/src/pcre2.h b/src/pcre2.h
index 8bc0345..e5425c0 100644
--- a/src/pcre2.h
+++ b/src/pcre2.h
@@ -240,6 +240,7 @@ numbers must not be changed. */
 #define PCRE2_ERROR_BADREPESCAPE      (-57)
 #define PCRE2_ERROR_REPMISSINGBRACE   (-58)
 #define PCRE2_ERROR_BADSUBSTITUTION   (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN    (-60)
 
 /* Request types for pcre2_pattern_info() */
 
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index fa559ad..d77994d 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -240,6 +240,7 @@ numbers must not be changed. */
 #define PCRE2_ERROR_BADREPESCAPE      (-57)
 #define PCRE2_ERROR_REPMISSINGBRACE   (-58)
 #define PCRE2_ERROR_BADSUBSTITUTION   (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN    (-60)
 
 /* Request types for pcre2_pattern_info() */
 
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index 2c1caaa..c8e7afb 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -170,8 +170,8 @@ static const char compile_error_texts[] =
   "(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
   /* 85 */
   "using \\C is disabled in this PCRE2 library\0"
-  "regular expression is too complicated\0" 
-  "lookbehind assertion is too long\0" 
+  "regular expression is too complicated\0"
+  "lookbehind assertion is too long\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */
@@ -247,7 +247,9 @@ static const char match_error_texts[] =
   "offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
   "bad escape sequence in replacement string\0"
   "expected closing curly bracket in replacement string\0"
-  "bad substitution in replacement string\0" 
+  "bad substitution in replacement string\0"
+  /* 60 */
+  "match with end before start is not supported\0"
   ;
 
 
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 1c60381..b861ba5 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -55,7 +55,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* In extended mode, we recognize ${name:+set text:unset text} and similar
 constructions. This requires the identification of unescaped : and }
 characters. This function scans for such. It must deal with nested ${
-constructions. The pointer to the text is updated, either to the required end 
+constructions. The pointer to the text is updated, either to the required end
 character, or to where an error was detected.
 
 Arguments:
@@ -107,7 +107,7 @@ for (; ptr < ptrend; ptr++)
 
   else if (*ptr == CHAR_BACKSLASH)
     {
-    int erc; 
+    int erc;
     int errorcode = 0;
     uint32_t ch;
 
@@ -279,10 +279,10 @@ do
 
   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
     match_data, mcontext);
-    
+
 #ifdef SUPPORT_UNICODE
   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
-#endif   
+#endif
 
   /* Any error other than no match returns the error code. No match when not
   doing the special after-empty-match global rematch, or when at the end of the
@@ -320,7 +320,14 @@ do
     continue;
     }
 
-  /* Handle a successful match. */
+  /* Handle a successful match. Matches that use \K to end before they start
+  are not supported. */
+
+  if (ovector[1] < ovector[0])
+    {
+    rc = PCRE2_ERROR_BADSUBSPATTERN;
+    goto EXIT;
+    }
 
   subs++;
   if (rc == 0) rc = ovector_count;
@@ -409,14 +416,14 @@ do
           next = *ptr;
           if (next < CHAR_0 || next > CHAR_9) break;
           group = group * 10 + next - CHAR_0;
-          
+
           /* A check for a number greater than the hightest captured group
           is sufficient here; no need for a separate overflow check. */
-            
+
           if (group > code->top_bracket)
             {
             rc = PCRE2_ERROR_NOSUBSTRING;
-            goto PTREXIT;   
+            goto PTREXIT;
             }
           }
         }
@@ -439,7 +446,7 @@ do
 
       if (inparens)
         {
-        
+
         if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
           {
           special = *(++ptr);
@@ -501,8 +508,8 @@ do
       else
         {
         PCRE2_SPTR subptr, subptrend;
-        
-        /* Find a number for a named group. In case there are duplicate names, 
+
+        /* Find a number for a named group. In case there are duplicate names,
         search for the first one that is set. */
 
         if (group < 0)
@@ -516,18 +523,18 @@ do
             if (ng < ovector_count)
               {
               if (group < 0) group = ng;          /* First in ovector */
-              if (ovector[ng*2] != PCRE2_UNSET) 
+              if (ovector[ng*2] != PCRE2_UNSET)
                 {
                 group = ng;                       /* First that is set */
                 break;
-                } 
+                }
               }
             }
-            
-          /* If group is still negative, it means we did not find a group that 
+
+          /* If group is still negative, it means we did not find a group that
           is in the ovector. Just set the first group. */
-          
-          if (group < 0) group = GET2(first, 0); 
+
+          if (group < 0) group = GET2(first, 0);
           }
 
         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
diff --git a/testdata/testinput2 b/testdata/testinput2
index 426b5bf..5142943 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4596,4 +4596,7 @@ B)x/alt_verbnames,mark
 
 /(?<!a{65535})x/I
 
+/(?=a\K)/replace=z
+    BaCaD
+
 # End of testinput2 
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 476184a..911ebc8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14690,4 +14690,8 @@ Max lookbehind = 65535
 First code unit = 'x'
 Subject length lower bound = 1
 
+/(?=a\K)/replace=z
+    BaCaD
+Failed: error -60: match with end before start is not supported
+
 # End of testinput2