diff --git a/ChangeLog b/ChangeLog index 67e89e7..63a115d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -84,6 +84,10 @@ pcre2test. 14. The alternative matching function, pcre2_dfa_match() misbehaved if it encountered a character class with a possessive repeat, for example [a-f]{3}+. +15. The depth (formerly recursion) limit now applies to DFA matching (as +of 10.23/36); pcre2test has been upgraded so that \=find_limits works with DFA +matching to find the minimum value for this limit. + Version 10.23 14-February-2017 ------------------------------ diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 3238c50..6170bd3 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "21 March 2017" "PCRE 10.30" +.TH PCRE2TEST 1 "24 March 2017" "PCRE 10.30" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -1052,7 +1052,7 @@ pattern. copy= copy captured substring depth_limit= set a depth limit dfa use \fBpcre2_dfa_match()\fP - find_limits find match and recursion limits + find_limits find match and depth limits get= extract captured substring getall extract all captured substrings /g global global matching @@ -1297,23 +1297,26 @@ limits in the match context. These values are ignored when the .SS "Finding minimum limits" .rs .sp -If the \fBfind_limits\fP modifier is present, \fBpcre2test\fP calls -\fBpcre2_match()\fP several times, setting different values in the match -context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_depth_limit()\fP -until it finds the minimum values for each parameter that allow -\fBpcre2_match()\fP to complete without error. +If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP +calls the relevant matching function several times, setting different values in +the match context via \fBpcre2_set_match_limit()\fP or +\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each +parameter that allows the match to complete without error. .P If JIT is being used, only the match limit is relevant. If DFA matching is -being used, only the depth limit is relevant, but at present this modifier is -ignored (with a warning message). +being used, only the depth limit is relevant. .P The \fImatch_limit\fP number is a measure of the amount of backtracking that takes place, and learning the minimum value can be instructive. For most simple matches, the number is quite small, but for patterns with very large numbers of matching possibilities, it can become large very quickly with -increasing length of subject string. The \fIdepth_limit\fP number is -a measure of how much memory for recording backtracking points is needed to -complete the match attempt. +increasing length of subject string. +.P +For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how +much memory for recording backtracking points is needed to complete the match +attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of +recursive calls of the internal function that is used for handling pattern +recursion, lookaround assertions, and atomic groups. . . .SS "Showing MARK names" @@ -1765,6 +1768,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 21 March 2017 +Last updated: 24 March 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/src/pcre2test.c b/src/pcre2test.c index b979dd1..cebd323 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -5258,8 +5258,20 @@ return PR_OK; * Check match or depth limit * *************************************************/ +/* This is used for DFA, normal, and JIT fast matching. For DFA matching it +should only called with the third argument set to PCRE2_ERROR_DEPTHLIMIT. + +Arguments: + pp the subject string + ulen length of subject or PCRE2_ZERO_TERMINATED + errnumber defines which limit to test + msg string to include in final message + +Returns: the return from the final match function call +*/ + static int -check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg) +check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg) { int capcount; uint32_t min = 0; @@ -5279,10 +5291,22 @@ for (;;) { PCRE2_SET_DEPTH_LIMIT(dat_context, mid); } - - if ((pat_patctl.control & CTL_JITFAST) != 0) + + if ((dat_datctl.control & CTL_DFA) != 0) + { + if (dfa_workspace == NULL) + dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); + if (dfa_matched++ == 0) + dfa_workspace[0] = -1; /* To catch bad restart */ + PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, + dat_datctl.options, match_data, + PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION); + } + + else if ((pat_patctl.control & CTL_JITFAST) != 0) PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options, match_data, PTR(dat_context)); + else PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, dat_datctl.options, match_data, PTR(dat_context)); @@ -6243,12 +6267,6 @@ if ((pat_patctl.control & CTL_POSIX) != 0) /* Handle matching via the native interface. Check for consistency of modifiers. */ -if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS)) - { - fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n"); - dat_datctl.control &= ~CTL_FINDLIMITS; - } - /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA matching, even if the JIT compiler was used. */ @@ -6579,14 +6597,19 @@ else for (gmatched = 0;; gmatched++) (double)CLOCKS_PER_SEC); } - /* Find the match and depth limits if requested. The depth limit - is not relevant for JIT. */ + /* Find the match and depth limits if requested. The match limit is not + relevant for DFA matching and the depth limit is not relevant for JIT. */ if ((dat_datctl.control & CTL_FINDLIMITS) != 0) { - capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, "match"); - if (FLD(compiled_code, executable_jit) == NULL) - (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT, + if ((dat_datctl.control & CTL_DFA) == 0) + capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, + "match"); + else capcount = 0; + if (FLD(compiled_code, executable_jit) == NULL || + (dat_datctl.options & PCRE2_NO_JIT) != 0 || + (dat_datctl.control & CTL_DFA) != 0) + capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT, "depth"); } diff --git a/testdata/testinput6 b/testdata/testinput6 index e928f4d..2856551 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4889,4 +4889,7 @@ /(02-)?[0-9]{3}-[0-9]{3}/ 02-123-123 +/^(a(?2))(b)(?1)/ + abbab\=find_limits + # End of testinput6 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 14a5b43..c112d4e 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7689,4 +7689,9 @@ Failed: error -53: matching depth limit exceeded 02-123-123 0: 02-123-123 +/^(a(?2))(b)(?1)/ + abbab\=find_limits +Minimum depth limit = 2 + 0: abbab + # End of testinput6