Make \=find_limits apply to DFA matching, to find the minimum depth limit.
This commit is contained in:
parent
3aeb812180
commit
5c1984d94a
|
@ -84,6 +84,10 @@ pcre2test.
|
||||||
14. The alternative matching function, pcre2_dfa_match() misbehaved if it
|
14. The alternative matching function, pcre2_dfa_match() misbehaved if it
|
||||||
encountered a character class with a possessive repeat, for example [a-f]{3}+.
|
encountered a character class with a possessive repeat, for example [a-f]{3}+.
|
||||||
|
|
||||||
|
15. The depth (formerly recursion) limit now applies to DFA matching (as
|
||||||
|
of 10.23/36); pcre2test has been upgraded so that \=find_limits works with DFA
|
||||||
|
matching to find the minimum value for this limit.
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "21 March 2017" "PCRE 10.30"
|
.TH PCRE2TEST 1 "24 March 2017" "PCRE 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -1052,7 +1052,7 @@ pattern.
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
depth_limit=<n> set a depth limit
|
depth_limit=<n> set a depth limit
|
||||||
dfa use \fBpcre2_dfa_match()\fP
|
dfa use \fBpcre2_dfa_match()\fP
|
||||||
find_limits find match and recursion limits
|
find_limits find match and depth limits
|
||||||
get=<number or name> extract captured substring
|
get=<number or name> extract captured substring
|
||||||
getall extract all captured substrings
|
getall extract all captured substrings
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1297,23 +1297,26 @@ limits in the match context. These values are ignored when the
|
||||||
.SS "Finding minimum limits"
|
.SS "Finding minimum limits"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
If the \fBfind_limits\fP modifier is present, \fBpcre2test\fP calls
|
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
|
||||||
\fBpcre2_match()\fP several times, setting different values in the match
|
calls the relevant matching function several times, setting different values in
|
||||||
context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_depth_limit()\fP
|
the match context via \fBpcre2_set_match_limit()\fP or
|
||||||
until it finds the minimum values for each parameter that allow
|
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||||
\fBpcre2_match()\fP to complete without error.
|
parameter that allows the match to complete without error.
|
||||||
.P
|
.P
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||||
being used, only the depth limit is relevant, but at present this modifier is
|
being used, only the depth limit is relevant.
|
||||||
ignored (with a warning message).
|
|
||||||
.P
|
.P
|
||||||
The \fImatch_limit\fP number is a measure of the amount of backtracking
|
The \fImatch_limit\fP number is a measure of the amount of backtracking
|
||||||
that takes place, and learning the minimum value can be instructive. For most
|
that takes place, and learning the minimum value can be instructive. For most
|
||||||
simple matches, the number is quite small, but for patterns with very large
|
simple matches, the number is quite small, but for patterns with very large
|
||||||
numbers of matching possibilities, it can become large very quickly with
|
numbers of matching possibilities, it can become large very quickly with
|
||||||
increasing length of subject string. The \fIdepth_limit\fP number is
|
increasing length of subject string.
|
||||||
a measure of how much memory for recording backtracking points is needed to
|
.P
|
||||||
complete the match attempt.
|
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
||||||
|
much memory for recording backtracking points is needed to complete the match
|
||||||
|
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||||
|
recursive calls of the internal function that is used for handling pattern
|
||||||
|
recursion, lookaround assertions, and atomic groups.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Showing MARK names"
|
.SS "Showing MARK names"
|
||||||
|
@ -1765,6 +1768,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 21 March 2017
|
Last updated: 24 March 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -5258,8 +5258,20 @@ return PR_OK;
|
||||||
* Check match or depth limit *
|
* Check match or depth limit *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
|
/* This is used for DFA, normal, and JIT fast matching. For DFA matching it
|
||||||
|
should only called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
pp the subject string
|
||||||
|
ulen length of subject or PCRE2_ZERO_TERMINATED
|
||||||
|
errnumber defines which limit to test
|
||||||
|
msg string to include in final message
|
||||||
|
|
||||||
|
Returns: the return from the final match function call
|
||||||
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
|
check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
|
||||||
{
|
{
|
||||||
int capcount;
|
int capcount;
|
||||||
uint32_t min = 0;
|
uint32_t min = 0;
|
||||||
|
@ -5279,10 +5291,22 @@ for (;;)
|
||||||
{
|
{
|
||||||
PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
|
PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
if ((dat_datctl.control & CTL_DFA) != 0)
|
||||||
|
{
|
||||||
|
if (dfa_workspace == NULL)
|
||||||
|
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
||||||
|
if (dfa_matched++ == 0)
|
||||||
|
dfa_workspace[0] = -1; /* To catch bad restart */
|
||||||
|
PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
|
dat_datctl.options, match_data,
|
||||||
|
PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||||
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options, match_data, PTR(dat_context));
|
dat_datctl.options, match_data, PTR(dat_context));
|
||||||
|
|
||||||
else
|
else
|
||||||
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options, match_data, PTR(dat_context));
|
dat_datctl.options, match_data, PTR(dat_context));
|
||||||
|
@ -6243,12 +6267,6 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
/* Handle matching via the native interface. Check for consistency of
|
/* Handle matching via the native interface. Check for consistency of
|
||||||
modifiers. */
|
modifiers. */
|
||||||
|
|
||||||
if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
|
|
||||||
{
|
|
||||||
fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
|
|
||||||
dat_datctl.control &= ~CTL_FINDLIMITS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
|
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
|
||||||
matching, even if the JIT compiler was used. */
|
matching, even if the JIT compiler was used. */
|
||||||
|
|
||||||
|
@ -6579,14 +6597,19 @@ else for (gmatched = 0;; gmatched++)
|
||||||
(double)CLOCKS_PER_SEC);
|
(double)CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the match and depth limits if requested. The depth limit
|
/* Find the match and depth limits if requested. The match limit is not
|
||||||
is not relevant for JIT. */
|
relevant for DFA matching and the depth limit is not relevant for JIT. */
|
||||||
|
|
||||||
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
||||||
{
|
{
|
||||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, "match");
|
if ((dat_datctl.control & CTL_DFA) == 0)
|
||||||
if (FLD(compiled_code, executable_jit) == NULL)
|
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
|
||||||
(void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
"match");
|
||||||
|
else capcount = 0;
|
||||||
|
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||||
|
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
|
||||||
|
(dat_datctl.control & CTL_DFA) != 0)
|
||||||
|
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
||||||
"depth");
|
"depth");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4889,4 +4889,7 @@
|
||||||
/(02-)?[0-9]{3}-[0-9]{3}/
|
/(02-)?[0-9]{3}-[0-9]{3}/
|
||||||
02-123-123
|
02-123-123
|
||||||
|
|
||||||
|
/^(a(?2))(b)(?1)/
|
||||||
|
abbab\=find_limits
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
|
@ -7689,4 +7689,9 @@ Failed: error -53: matching depth limit exceeded
|
||||||
02-123-123
|
02-123-123
|
||||||
0: 02-123-123
|
0: 02-123-123
|
||||||
|
|
||||||
|
/^(a(?2))(b)(?1)/
|
||||||
|
abbab\=find_limits
|
||||||
|
Minimum depth limit = 2
|
||||||
|
0: abbab
|
||||||
|
|
||||||
# End of testinput6
|
# End of testinput6
|
||||||
|
|
Loading…
Reference in New Issue