Implement PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). No documentation

yet.
This commit is contained in:
Philip.Hazel 2015-09-22 11:24:28 +00:00
parent be7366f6f0
commit 69d61e2fdb
13 changed files with 206 additions and 43 deletions

View File

@ -187,6 +187,8 @@ the SSE2 instruction set.
52. It is now possible to have comment lines amid the subject strings in
pcre2test (and perltest.sh) input.
53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit().
Version 10.20 30-June-2015
--------------------------

View File

@ -121,6 +121,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
/* These are for pcre2_jit_compile(). */
@ -234,6 +235,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
#define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
/* Request types for pcre2_pattern_info() */
@ -406,6 +408,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
PCRE2_SIZE); \
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
@ -609,6 +613,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)

View File

@ -566,7 +566,8 @@ static PCRE2_SPTR posix_substitutes[] = {
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF)
PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
PCRE2_UTF)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and

View File

@ -169,6 +169,7 @@ const pcre2_match_context PRIV(default_match_context) = {
#endif
NULL,
NULL,
PCRE2_UNSET, /* Offset limit */
MATCH_LIMIT,
MATCH_LIMIT_RECURSION };
@ -347,6 +348,13 @@ mcontext->match_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{

View File

@ -3116,6 +3116,7 @@ const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match;
PCRE2_SPTR end_subject;
PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline;
@ -3176,11 +3177,6 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
/* A NULL match context means "use a default context" */
if (mcontext == NULL)
mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
/* If restarting after a partial match, do some sanity checks on the contents
of the workspace. */
@ -3205,8 +3201,11 @@ where to start. */
startline = (re->flags & PCRE2_STARTLINE) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
bumpalong_limit = end_subject;
/* Fill in the fields in the match block. */
/* Get data from the match context, if present, and fill in the fields in the
match block. It is an error to set an offset limit without setting the flag at
compile time. */
if (mcontext == NULL)
{
@ -3215,6 +3214,12 @@ if (mcontext == NULL)
}
else
{
if (mcontext->offset_limit != PCRE2_UNSET)
{
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
bumpalong_limit = subject + mcontext->offset_limit;
}
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl;
@ -3539,6 +3544,10 @@ for (;;)
/* ------------ End of start of match optimizations ------------ */
/* Give no match if we have passed the bumpalong limit. */
if (start_match > bumpalong_limit) break;
/* OK, now we can do the business */
mb->start_used_ptr = start_match;

View File

@ -238,7 +238,9 @@ static const char match_error_texts[] =
"nested recursion at the same subject position\0"
"recursion limit exceeded\0"
"requested value is not available\0"
/* 55 */
"requested value is not set\0"
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
;

View File

@ -580,6 +580,7 @@ typedef struct pcre2_real_match_context {
#endif
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
uint32_t match_limit;
uint32_t recursion_limit;
} pcre2_real_match_context;

View File

@ -6462,6 +6462,7 @@ PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
PCRE2_UCHAR req_cu2 = 0;
PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR end_subject;
PCRE2_SPTR start_match = subject + start_offset;
PCRE2_SPTR req_cu_ptr = start_match - 1;
@ -6584,6 +6585,13 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
}
#endif /* SUPPORT_UNICODE */
/* It is an error to set an offset limit without setting the flag at compile
time. */
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
(re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
/* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if
@ -6591,6 +6599,13 @@ an unsupported option is set or if JIT returns BADOPTION (which means that the
selected normal or partial matching mode was not compiled). */
#ifdef SUPPORT_JIT
/* +++ TEMPORARY: JIT does not yet support offset_limit. */
if (mcontext == NULL || mcontext->offset_limit == PCRE2_UNSET)
/* +++ */
if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
{
rc = pcre2_jit_match(code, subject, length, start_offset, options,
@ -6604,8 +6619,10 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
startline = (re->flags & PCRE2_STARTLINE) != 0;
bumpalong_limit = end_subject;
/* Fill in the fields in the match block. */
/* Get data from the match context, if it exists, and fill in the fields in the
match block. */
if (mcontext == NULL)
{
@ -6617,6 +6634,8 @@ if (mcontext == NULL)
}
else
{
if (mcontext->offset_limit != PCRE2_UNSET)
bumpalong_limit = subject + mcontext->offset_limit;
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl;
@ -6970,6 +6989,14 @@ for(;;)
/* ------------ End of start of match optimizations ------------ */
/* Give no match if we have passed the bumpalong limit. */
if (start_match > bumpalong_limit)
{
rc = MATCH_NOMATCH;
break;
}
/* OK, we can now run the match. If "hitend" is set afterwards, remember the
first starting point for which a partial match was found. */
@ -7088,7 +7115,7 @@ for(;;)
(2) The pattern is anchored or the match was failed by (*COMMIT);
(3) We are past the end of the subject;
(3) We are past the end of the subject or the bumpalong limit;
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
this option requests that a match occur at or before the first newline in

View File

@ -379,6 +379,7 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_NL, /* Is a newline value */
MOD_NN, /* Is a number or a name; more than one may occur */
MOD_OPT, /* Is an option bit */
MOD_SIZ, /* Is a PCRE2_SIZE value */
MOD_STR }; /* Is a string */
/* Control bits. Some apply to compiling, some to matching, but some can be set
@ -550,6 +551,7 @@ static modstruct modlist[] = {
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
{ "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
{ "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
{ "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
{ "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
{ "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
@ -565,6 +567,7 @@ static modstruct modlist[] = {
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
{ "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};
@ -1067,6 +1070,14 @@ are supported. */
else \
pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_offset_limit_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
pcre2_set_offset_limit_16(G(a,16),b); \
else \
pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_parens_nest_limit_8(G(a,8),b); \
@ -1467,6 +1478,12 @@ the three different cases. */
else \
G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
else \
G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
@ -1665,6 +1682,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -1756,6 +1774,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -1847,6 +1866,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -3235,6 +3255,12 @@ for (;;)
}
/* Fall through */
case MOD_SIZ: /* PCRE2_SIZE value */
if (!isdigit(*pp)) goto INVALID_VALUE;
*((PCRE2_SIZE *)field) = (PCRE2_SIZE)strtoul((const char *)pp, &endptr, 10);
pp = (uint8_t *)endptr;
break;
case MOD_INT: /* Unsigned integer */
if (!isdigit(*pp)) goto INVALID_VALUE;
*((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10);
@ -3473,7 +3499,7 @@ static void
show_compile_options(uint32_t options, const char *before, const char *after)
{
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
@ -3499,6 +3525,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
((options & PCRE2_UCP) != 0)? " ucp" : "",
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
((options & PCRE2_UTF) != 0)? " utf" : "",
after);
}

18
testdata/testinput2 vendored
View File

@ -4515,4 +4515,22 @@ B)x/alt_verbnames,mark
/(*:abc\Qpqr)/alt_verbnames
/abc/use_offset_limit
1234abcde\=offset_limit=100
1234abcde\=offset_limit=9
1234abcde\=offset_limit=4
1234abcde\=offset_limit=4,offset=4
\= Expect no match
1234abcde\=offset_limit=4,offset=5
1234abcde\=offset_limit=3
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
\= Expect no match
1234abc\=offset_limit=6
/abc/
\= Expect error
1234abcde\=offset_limit=4
# End of testinput2

14
testdata/testinput6 vendored
View File

@ -4868,4 +4868,18 @@
/^/gm,alt_circumflex
\n\n\n
/abc/use_offset_limit
1234abcde\=offset_limit=100
1234abcde\=offset_limit=9
1234abcde\=offset_limit=4
1234abcde\=offset_limit=4,offset=4
\= Expect no match
1234abcde\=offset_limit=4,offset=5
1234abcde\=offset_limit=3
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
\= Expect no match
1234abc\=offset_limit=6
# End of testinput6

27
testdata/testoutput2 vendored
View File

@ -14633,4 +14633,31 @@ MK: A\x0aB
/(*:abc\Qpqr)/alt_verbnames
Failed: error 160 at offset 12: (*VERB) not recognized or malformed
/abc/use_offset_limit
1234abcde\=offset_limit=100
0: abc
1234abcde\=offset_limit=9
0: abc
1234abcde\=offset_limit=4
0: abc
1234abcde\=offset_limit=4,offset=4
0: abc
\= Expect no match
1234abcde\=offset_limit=4,offset=5
No match
1234abcde\=offset_limit=3
No match
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
0:
\= Expect no match
1234abc\=offset_limit=6
No match
/abc/
\= Expect error
1234abcde\=offset_limit=4
Failed: error -56: offset limit set without PCRE2_USE_OFFSET_LIMIT
# End of testinput2

22
testdata/testoutput6 vendored
View File

@ -7655,4 +7655,26 @@ No match
0:
0:
/abc/use_offset_limit
1234abcde\=offset_limit=100
0: abc
1234abcde\=offset_limit=9
0: abc
1234abcde\=offset_limit=4
0: abc
1234abcde\=offset_limit=4,offset=4
0: abc
\= Expect no match
1234abcde\=offset_limit=4,offset=5
No match
1234abcde\=offset_limit=3
No match
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
0:
\= Expect no match
1234abc\=offset_limit=6
No match
# End of testinput6