Impose a minimum of 1 for the number of pairs in the ovector.
This commit is contained in:
parent
4ca4ad688d
commit
4bdfd990af
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "01 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2API 3 "05 October 2014" "PCRE2 10.00"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1650,13 +1650,15 @@ particular, the match data block contains a vector of offsets into the subject
|
|||
string that define the matched part of the subject and any substrings that were
|
||||
capured. This is know as the \fIovector\fP.
|
||||
.P
|
||||
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
|
||||
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
|
||||
match data block by calling one of the creation functions above. For
|
||||
\fBpcre2_match_data_create()\fP, the first argument is the number of pairs of
|
||||
offsets in the \fIovector\fP. One pair of offsets is required to identify the
|
||||
string that matched the whole pattern, with another pair for each captured
|
||||
substring. For example, a value of 4 creates enough space to record the
|
||||
matched portion of the subject plus three captured substrings.
|
||||
substring. For example, a value of 4 creates enough space to record the matched
|
||||
portion of the subject plus three captured substrings. A minimum of at least 1
|
||||
pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to
|
||||
return the overall matched string.
|
||||
.P
|
||||
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
|
||||
pointer to a compiled pattern. In this case the ovector is created to be
|
||||
|
@ -2015,13 +2017,13 @@ operation, it is the last portion of the string that it matched that is
|
|||
returned.
|
||||
.P
|
||||
If the ovector is too small to hold all the captured substring offsets, as much
|
||||
as possible is filled in, and the function returns a value of zero. If neither
|
||||
the actual string matched nor any captured substrings are of interest,
|
||||
\fBpcre2_match()\fP may be called with a match data block whose ovector is of
|
||||
zero length. However, if the pattern contains back references and the
|
||||
\fIovector\fP is not big enough to remember the related substrings, PCRE2 has
|
||||
to get additional memory for use during matching. Thus it is usually advisable
|
||||
to set up a match data block containing an ovector of reasonable size.
|
||||
as possible is filled in, and the function returns a value of zero. If captured
|
||||
substrings are not of interest, \fBpcre2_match()\fP may be called with a match
|
||||
data block whose ovector is of minimum length (that is, one pair). However, if
|
||||
the pattern contains back references and the \fIovector\fP is not big enough to
|
||||
remember the related substrings, PCRE2 has to get additional memory for use
|
||||
during matching. Thus it is usually advisable to set up a match data block
|
||||
containing an ovector of reasonable size.
|
||||
.P
|
||||
It is possible for capturing subpattern number \fIn+1\fP to match some part of
|
||||
the subject when subpattern \fIn\fP has not been used at all. For example, if
|
||||
|
@ -2652,6 +2654,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 01 October 2014
|
||||
Last updated: 05 October 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "19 August 2014" "PCRE 10.00"
|
||||
.TH PCRE2TEST 1 "05 October 2014" "PCRE 10.00"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -881,6 +881,12 @@ The \fBovector\fP modifier applies only to the subject line in which it
|
|||
appears, though of course it can also be used to set a default in a
|
||||
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
||||
available for storing matching information. The default is 15.
|
||||
.P
|
||||
At least one pair of offsets is always created by
|
||||
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
|
||||
vector.
|
||||
.
|
||||
.
|
||||
.SH "THE ALTERNATIVE MATCHING FUNCTION"
|
||||
|
@ -1145,6 +1151,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 19 August 2014
|
||||
Last updated: 05 October 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -51,10 +51,14 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector triplets. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield = PRIV(memctl_malloc)(
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
yield->oveccount = oveccount;
|
||||
|
|
|
@ -2531,7 +2531,7 @@ switch (m->which)
|
|||
case MOD_CTC: /* Compile context modifier */
|
||||
if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
|
||||
else if (ctx == CTX_PAT) field = PTR(pat_context);
|
||||
break;
|
||||
break;
|
||||
|
||||
case MOD_CTM: /* Match context modifier */
|
||||
if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
|
||||
|
@ -3705,8 +3705,8 @@ if (TEST(compiled_code, ==, NULL))
|
|||
/* Call the JIT compiler if requested. */
|
||||
|
||||
if (pat_patctl.jit != 0)
|
||||
{
|
||||
PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit);
|
||||
{
|
||||
PCRE2_JIT_COMPILE(compiled_code, pat_patctl.jit);
|
||||
}
|
||||
|
||||
/* Output code size and other information if requested. */
|
||||
|
@ -4385,11 +4385,10 @@ if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
|
|||
dat_datctl.control &= ~CTL_FINDLIMITS;
|
||||
}
|
||||
|
||||
if ((dat_datctl.control & CTL_ANYGLOB) != 0 && dat_datctl.oveccount < 1)
|
||||
{
|
||||
printf("** Global matching requires a non-zero ovector count: ignored\n");
|
||||
dat_datctl.control &= ~CTL_ANYGLOB;
|
||||
}
|
||||
/* As pcre2_match_data_create() imposes a minimum of 1 on the ovector count, we
|
||||
must do so too. */
|
||||
|
||||
if (dat_datctl.oveccount < 1) dat_datctl.oveccount = 1;
|
||||
|
||||
/* Enable display of malloc/free if wanted. */
|
||||
|
||||
|
@ -4438,28 +4437,28 @@ else
|
|||
PCRE2_MATCH_DATA_FREE(match_data);
|
||||
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
|
||||
}
|
||||
|
||||
|
||||
/* Loop for global matching */
|
||||
|
||||
for (gmatched = 0;; gmatched++)
|
||||
{
|
||||
int capcount;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[2];
|
||||
PCRE2_SIZE ovecsave[2];
|
||||
|
||||
jit_was_used = FALSE;
|
||||
ovector = FLD(match_data, ovector);
|
||||
|
||||
|
||||
/* After the first time round a global loop, save the current ovector[0,1] so
|
||||
that we can check that they do change each time. Otherwise a matching bug
|
||||
that we can check that they do change each time. Otherwise a matching bug
|
||||
that returns the same string causes an infinite loop. It has happened! */
|
||||
|
||||
if (gmatched > 0)
|
||||
{
|
||||
{
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
}
|
||||
|
||||
ovecsave[1] = ovector[1];
|
||||
}
|
||||
|
||||
/* Do timing if required. */
|
||||
|
||||
if (timeitm > 0)
|
||||
|
@ -4564,7 +4563,7 @@ for (gmatched = 0;; gmatched++)
|
|||
PCRE2_SIZE rightchar = FLD(match_data, rightchar);
|
||||
|
||||
/* This is a check against a lunatic return value. */
|
||||
|
||||
|
||||
if (capcount > (int)dat_datctl.oveccount)
|
||||
{
|
||||
fprintf(outfile,
|
||||
|
@ -4577,20 +4576,20 @@ for (gmatched = 0;; gmatched++)
|
|||
dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
|
||||
}
|
||||
}
|
||||
|
||||
/* If this is not the first time round a global loop, check that the
|
||||
returned string has changed. If not, there is a bug somewhere and we must
|
||||
|
||||
/* If this is not the first time round a global loop, check that the
|
||||
returned string has changed. If not, there is a bug somewhere and we must
|
||||
break the loop because it will go on for ever. We know that for a global
|
||||
match there must be at least two elements in the ovector. This is checked
|
||||
match there must be at least two elements in the ovector. This is checked
|
||||
above. */
|
||||
|
||||
|
||||
if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
fprintf(outfile,
|
||||
fprintf(outfile,
|
||||
"** PCRE2 error: global repeat returned the same string as previous\n");
|
||||
fprintf(outfile, "** Global loop abandoned\n");
|
||||
dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */
|
||||
}
|
||||
}
|
||||
|
||||
/* "allcaptures" requests showing of all captures in the pattern, to check
|
||||
unset ones at the end. It may be set on the pattern or the data. Implement
|
||||
|
@ -4647,7 +4646,7 @@ for (gmatched = 0;; gmatched++)
|
|||
PCHARSV(pp, start, end - start, utf, outfile);
|
||||
}
|
||||
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
fprintf(outfile, " (JIT)");
|
||||
fprintf(outfile, "\n");
|
||||
|
||||
|
@ -4864,7 +4863,7 @@ for (gmatched = 0;; gmatched++)
|
|||
|
||||
fprintf(outfile, ": ");
|
||||
PCHARSV(pp, leftchar, ulen - leftchar, utf, outfile);
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
fprintf(outfile, " (JIT)");
|
||||
fprintf(outfile, "\n");
|
||||
break; /* Out of the /g loop */
|
||||
|
@ -4875,8 +4874,7 @@ for (gmatched = 0;; gmatched++)
|
|||
If that is the case, this is not necessarily the end. We want to advance the
|
||||
start offset, and continue. We won't be at the end of the string - that was
|
||||
checked before setting g_notempty. We achieve the effect by pretending that a
|
||||
single character was matched. We know that match_data->oveccount is at least
|
||||
1 because that was checked above.
|
||||
single character was matched.
|
||||
|
||||
Complication arises in the case when the newline convention is "any", "crlf",
|
||||
or "anycrlf". If the previous match was at the end of a line terminated by
|
||||
|
@ -4936,7 +4934,7 @@ for (gmatched = 0;; gmatched++)
|
|||
fprintf(outfile, ", mark = ");
|
||||
PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
|
||||
}
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
fprintf(outfile, " (JIT)");
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
|
|
|
@ -245,6 +245,7 @@ Subject length lower bound = 4
|
|||
3: c
|
||||
abcb\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: abcb
|
||||
abcb\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: abcb
|
||||
|
@ -273,6 +274,7 @@ Subject length lower bound = 3
|
|||
1: a
|
||||
abc\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: abc
|
||||
abc\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: abc
|
||||
|
@ -286,6 +288,7 @@ Matched, but too many substrings
|
|||
3: b
|
||||
aba\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: aba
|
||||
aba\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: aba
|
||||
|
@ -7404,6 +7407,7 @@ Subject length lower bound = 3
|
|||
No match
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
|
||||
|
||||
/^a.b/newline=lf
|
||||
a\rb
|
||||
|
@ -10922,6 +10926,7 @@ Minimum recursion limit = 4
|
|||
3: baz
|
||||
bazfooX\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: fooX
|
||||
bazfooX\=ovector=1
|
||||
Matched, but too many substrings
|
||||
0: fooX
|
||||
|
@ -11970,7 +11975,7 @@ Callout 2: last capture = 0
|
|||
|
||||
/(ab)x|ab/
|
||||
ab\=ovector=0
|
||||
Matched, but too many substrings
|
||||
0: ab
|
||||
ab\=ovector=1
|
||||
0: ab
|
||||
|
||||
|
|
|
@ -7611,7 +7611,7 @@ Failed: error -37: invalid data in workspace for DFA restart
|
|||
|
||||
/abcd/
|
||||
abcd\=ovector=0
|
||||
Matched, but offsets vector is too small to show all matches
|
||||
0: abcd
|
||||
|
||||
# These tests show up auto-possessification
|
||||
|
||||
|
|
Loading…
Reference in New Issue