diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3 index 6c878d0..a0b635a 100644 --- a/doc/pcre2callout.3 +++ b/doc/pcre2callout.3 @@ -1,4 +1,4 @@ -.TH PCRE2CALLOUT 3 "29 March 2017" "PCRE2 10.30" +.TH PCRE2CALLOUT 3 "14 April 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH SYNOPSIS @@ -191,8 +191,8 @@ callouts such as the example above are obeyed. .rs .sp During matching, when PCRE2 reaches a callout point, if an external function is -set in the match context, it is called. This applies to both normal and DFA -matching. The first argument to the callout function is a pointer to a +provided in the match context, it is called. This applies to both normal and +DFA matching. The first argument to the callout function is a pointer to a \fBpcre2_callout\fP block. The second argument is the void * callout data that was supplied when the callout was set up by calling \fBpcre2_set_callout()\fP (see the @@ -261,7 +261,7 @@ The remaining fields in the callout block are the same for both kinds of callout. .P The \fIoffset_vector\fP field is a pointer to a vector of capturing offsets -(the "ovector"). You may read certain elements in this vector, but you must not +(the "ovector"). You may read the elements in this vector, but you must not change any of them. .P For calls to \fBpcre2_match()\fP, the \fIoffset_vector\fP field is not (since @@ -281,10 +281,11 @@ for example, when the callout in the pattern ((a)(b))(?C2) is taken, .P The contents of ovector[2] to ovector[*2-1] can be inspected in order to extract substrings that have been matched so far, in the same way as -extracting substrings after a match has completed. The values in ovector[0] and -ovector[1] are undefined and should not be used in any way. Substrings that -have not been captured (but whose numbers are less than \fIcapture_top\fP) have -both of their ovector slots set to PCRE2_UNSET. +extracting substrings after a match has completed. The values in ovector[0] and +ovector[1] are always PCRE2_UNSET because the match is by definition not +complete. Substrings that have not been captured but whose numbers are less +than \fIcapture_top\fP also have both of their ovector slots set to +PCRE2_UNSET. .P For DFA matching, the \fIoffset_vector\fP field points to the ovector that was passed to the matching function in the match data block, but it holds no useful @@ -410,6 +411,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 29 March 2017 +Last updated: 14 April 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 60d792d..859e606 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -263,20 +263,14 @@ Returns: the return from the callout static int do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr) { +PCRE2_SIZE save0, save1; pcre2_callout_block cb; +int rc; *lengthptr = (*Fecode == OP_CALLOUT)? PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE); if (mb->callout == NULL) return 0; /* No callout function provided */ - -/* The original matching code (pre 10.40) worked directly with the ovector -passed by the user, and this was passed to callouts. Now that the working -ovector is in the backtracking frame, it no longer needs to reserve space for -the overall match offsets (which would waste space in the frame). For backward -compatibility, however, we pass capture_top and offset_vector to the callout as -if for the extended ovector. */ - cb.version = 1; cb.capture_top = (uint32_t)Foffset_top/2 + 1; cb.capture_last = Fcapture_last; @@ -304,8 +298,22 @@ else /* String callout */ cb.callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2; } + +/* The original matching code (pre 10.30) worked directly with the ovector +passed by the user, and this was passed to callouts. Now that the working +ovector is in the backtracking frame, it no longer needs to reserve space for +the overall match offsets (which would waste space in the frame). For backward +compatibility, however, we pass capture_top and offset_vector to the callout as +if for the extended ovector, and we ensure that the first two slots are unset +by preserving and restoring their current contents. */ -return mb->callout(&cb, mb->callout_data); +save0 = Fovector[-2]; +save1 = Fovector[-1]; +Fovector[-2] = Fovector[-1] = PCRE2_UNSET; +rc = mb->callout(&cb, mb->callout_data); +Fovector[-2] = save0; +Fovector[-1] = save1; +return rc; }