Ensure ovector[0,1] are both PCRE2_UNSET during callouts.
This commit is contained in:
parent
e8cdae3c5b
commit
106028089e
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2CALLOUT 3 "29 March 2017" "PCRE2 10.30"
|
.TH PCRE2CALLOUT 3 "14 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -191,8 +191,8 @@ callouts such as the example above are obeyed.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
During matching, when PCRE2 reaches a callout point, if an external function is
|
During matching, when PCRE2 reaches a callout point, if an external function is
|
||||||
set in the match context, it is called. This applies to both normal and DFA
|
provided in the match context, it is called. This applies to both normal and
|
||||||
matching. The first argument to the callout function is a pointer to a
|
DFA matching. The first argument to the callout function is a pointer to a
|
||||||
\fBpcre2_callout\fP block. The second argument is the void * callout data that
|
\fBpcre2_callout\fP block. The second argument is the void * callout data that
|
||||||
was supplied when the callout was set up by calling \fBpcre2_set_callout()\fP
|
was supplied when the callout was set up by calling \fBpcre2_set_callout()\fP
|
||||||
(see the
|
(see the
|
||||||
|
@ -261,7 +261,7 @@ The remaining fields in the callout block are the same for both kinds of
|
||||||
callout.
|
callout.
|
||||||
.P
|
.P
|
||||||
The \fIoffset_vector\fP field is a pointer to a vector of capturing offsets
|
The \fIoffset_vector\fP field is a pointer to a vector of capturing offsets
|
||||||
(the "ovector"). You may read certain elements in this vector, but you must not
|
(the "ovector"). You may read the elements in this vector, but you must not
|
||||||
change any of them.
|
change any of them.
|
||||||
.P
|
.P
|
||||||
For calls to \fBpcre2_match()\fP, the \fIoffset_vector\fP field is not (since
|
For calls to \fBpcre2_match()\fP, the \fIoffset_vector\fP field is not (since
|
||||||
|
@ -281,10 +281,11 @@ for example, when the callout in the pattern ((a)(b))(?C2) is taken,
|
||||||
.P
|
.P
|
||||||
The contents of ovector[2] to ovector[<capture_top>*2-1] can be inspected in
|
The contents of ovector[2] to ovector[<capture_top>*2-1] can be inspected in
|
||||||
order to extract substrings that have been matched so far, in the same way as
|
order to extract substrings that have been matched so far, in the same way as
|
||||||
extracting substrings after a match has completed. The values in ovector[0] and
|
extracting substrings after a match has completed. The values in ovector[0] and
|
||||||
ovector[1] are undefined and should not be used in any way. Substrings that
|
ovector[1] are always PCRE2_UNSET because the match is by definition not
|
||||||
have not been captured (but whose numbers are less than \fIcapture_top\fP) have
|
complete. Substrings that have not been captured but whose numbers are less
|
||||||
both of their ovector slots set to PCRE2_UNSET.
|
than \fIcapture_top\fP also have both of their ovector slots set to
|
||||||
|
PCRE2_UNSET.
|
||||||
.P
|
.P
|
||||||
For DFA matching, the \fIoffset_vector\fP field points to the ovector that was
|
For DFA matching, the \fIoffset_vector\fP field points to the ovector that was
|
||||||
passed to the matching function in the match data block, but it holds no useful
|
passed to the matching function in the match data block, but it holds no useful
|
||||||
|
@ -410,6 +411,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 29 March 2017
|
Last updated: 14 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -263,20 +263,14 @@ Returns: the return from the callout
|
||||||
static int
|
static int
|
||||||
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
|
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
|
||||||
{
|
{
|
||||||
|
PCRE2_SIZE save0, save1;
|
||||||
pcre2_callout_block cb;
|
pcre2_callout_block cb;
|
||||||
|
int rc;
|
||||||
|
|
||||||
*lengthptr = (*Fecode == OP_CALLOUT)?
|
*lengthptr = (*Fecode == OP_CALLOUT)?
|
||||||
PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
|
PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
|
||||||
|
|
||||||
if (mb->callout == NULL) return 0; /* No callout function provided */
|
if (mb->callout == NULL) return 0; /* No callout function provided */
|
||||||
|
|
||||||
/* The original matching code (pre 10.40) worked directly with the ovector
|
|
||||||
passed by the user, and this was passed to callouts. Now that the working
|
|
||||||
ovector is in the backtracking frame, it no longer needs to reserve space for
|
|
||||||
the overall match offsets (which would waste space in the frame). For backward
|
|
||||||
compatibility, however, we pass capture_top and offset_vector to the callout as
|
|
||||||
if for the extended ovector. */
|
|
||||||
|
|
||||||
cb.version = 1;
|
cb.version = 1;
|
||||||
cb.capture_top = (uint32_t)Foffset_top/2 + 1;
|
cb.capture_top = (uint32_t)Foffset_top/2 + 1;
|
||||||
cb.capture_last = Fcapture_last;
|
cb.capture_last = Fcapture_last;
|
||||||
|
@ -304,8 +298,22 @@ else /* String callout */
|
||||||
cb.callout_string_length =
|
cb.callout_string_length =
|
||||||
*lengthptr - (1 + 4*LINK_SIZE) - 2;
|
*lengthptr - (1 + 4*LINK_SIZE) - 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The original matching code (pre 10.30) worked directly with the ovector
|
||||||
|
passed by the user, and this was passed to callouts. Now that the working
|
||||||
|
ovector is in the backtracking frame, it no longer needs to reserve space for
|
||||||
|
the overall match offsets (which would waste space in the frame). For backward
|
||||||
|
compatibility, however, we pass capture_top and offset_vector to the callout as
|
||||||
|
if for the extended ovector, and we ensure that the first two slots are unset
|
||||||
|
by preserving and restoring their current contents. */
|
||||||
|
|
||||||
return mb->callout(&cb, mb->callout_data);
|
save0 = Fovector[-2];
|
||||||
|
save1 = Fovector[-1];
|
||||||
|
Fovector[-2] = Fovector[-1] = PCRE2_UNSET;
|
||||||
|
rc = mb->callout(&cb, mb->callout_data);
|
||||||
|
Fovector[-2] = save0;
|
||||||
|
Fovector[-1] = save1;
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue