Implement PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). No documentation

yet.
This commit is contained in:
Philip.Hazel 2015-09-22 11:24:28 +00:00
parent be7366f6f0
commit 69d61e2fdb
13 changed files with 206 additions and 43 deletions

View File

@ -187,6 +187,8 @@ the SSE2 instruction set.
52. It is now possible to have comment lines amid the subject strings in 52. It is now possible to have comment lines amid the subject strings in
pcre2test (and perltest.sh) input. pcre2test (and perltest.sh) input.
53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit().
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -121,6 +121,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ #define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ #define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ #define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
/* These are for pcre2_jit_compile(). */ /* These are for pcre2_jit_compile(). */
@ -234,6 +235,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_RECURSIONLIMIT (-53) #define PCRE2_ERROR_RECURSIONLIMIT (-53)
#define PCRE2_ERROR_UNAVAILABLE (-54) #define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55) #define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -406,6 +408,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \ int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
PCRE2_SIZE); \
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
@ -609,6 +613,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)

View File

@ -566,7 +566,8 @@ static PCRE2_SPTR posix_substitutes[] = {
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF) PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
PCRE2_UTF)
/* Compile time error code numbers. They are given names so that they can more /* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and easily be tracked. When a new number is added, the tables called eint1 and
@ -2797,8 +2798,8 @@ return n8;
*************************************************/ *************************************************/
/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to /* This function is called when the PCRE2_ALT_VERBNAMES option is set, to
process the characters in a verb's name argument. It is called twice, once with process the characters in a verb's name argument. It is called twice, once with
codeptr == NULL, to find out the length of the processed name, and again to put codeptr == NULL, to find out the length of the processed name, and again to put
the name into memory. the name into memory.
Arguments: Arguments:
@ -2837,12 +2838,12 @@ for (; ptr < cb->end_pattern; ptr++)
} }
else /* Not a literal character */ else /* Not a literal character */
{ {
if (x == CHAR_RIGHT_PARENTHESIS) break; if (x == CHAR_RIGHT_PARENTHESIS) break;
/* Skip over comments and whitespace in extended mode. Need a loop to handle /* Skip over comments and whitespace in extended mode. Need a loop to handle
whitespace after a comment. */ whitespace after a comment. */
if ((options & PCRE2_EXTENDED) != 0) if ((options & PCRE2_EXTENDED) != 0)
{ {
for (;;) for (;;)
@ -2864,21 +2865,21 @@ for (; ptr < cb->end_pattern; ptr++)
} }
x = *ptr; /* Either NULL or the char after a newline */ x = *ptr; /* Either NULL or the char after a newline */
} }
if (ptr >= cb->end_pattern) break; if (ptr >= cb->end_pattern) break;
} }
/* Process escapes */ /* Process escapes */
if (x == '\\') if (x == '\\')
{ {
int rc; int rc;
*errorcodeptr = 0; *errorcodeptr = 0;
rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb); rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
*ptrptr = ptr; /* For possible error */ *ptrptr = ptr; /* For possible error */
if (*errorcodeptr != 0) return -1; if (*errorcodeptr != 0) return -1;
if (rc != 0) if (rc != 0)
{ {
if (rc == ESC_Q) if (rc == ESC_Q)
{ {
inescq = TRUE; inescq = TRUE;
continue; continue;
@ -2888,8 +2889,8 @@ for (; ptr < cb->end_pattern; ptr++)
return -1; return -1;
} }
} }
} }
/* We have the next character in the name. */ /* We have the next character in the name. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
@ -5541,7 +5542,7 @@ for (;; ptr++)
} }
else else
{ {
arglen = process_verb_name(&ptr, NULL, errorcodeptr, options, arglen = process_verb_name(&ptr, NULL, errorcodeptr, options,
utf, cb); utf, cb);
if (arglen < 0) goto FAILED; if (arglen < 0) goto FAILED;
} }
@ -5616,9 +5617,9 @@ for (;; ptr++)
if ((options & PCRE2_ALT_VERBNAMES) != 0) if ((options & PCRE2_ALT_VERBNAMES) != 0)
{ {
PCRE2_UCHAR *memcode = code; /* code is "register" */ PCRE2_UCHAR *memcode = code; /* code is "register" */
(void)process_verb_name(&arg, &memcode, errorcodeptr, options, (void)process_verb_name(&arg, &memcode, errorcodeptr, options,
utf, cb); utf, cb);
code = memcode; code = memcode;
} }
else /* No argument processing */ else /* No argument processing */
{ {

View File

@ -169,6 +169,7 @@ const pcre2_match_context PRIV(default_match_context) = {
#endif #endif
NULL, NULL,
NULL, NULL,
PCRE2_UNSET, /* Offset limit */
MATCH_LIMIT, MATCH_LIMIT,
MATCH_LIMIT_RECURSION }; MATCH_LIMIT_RECURSION };
@ -347,6 +348,13 @@ mcontext->match_limit = limit;
return 0; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{ {

View File

@ -3116,6 +3116,7 @@ const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match; PCRE2_SPTR start_match;
PCRE2_SPTR end_subject; PCRE2_SPTR end_subject;
PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr; PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline; BOOL utf, anchored, startline, firstline;
@ -3176,11 +3177,6 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF #undef FF
#undef OO #undef OO
/* A NULL match context means "use a default context" */
if (mcontext == NULL)
mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
/* If restarting after a partial match, do some sanity checks on the contents /* If restarting after a partial match, do some sanity checks on the contents
of the workspace. */ of the workspace. */
@ -3205,8 +3201,11 @@ where to start. */
startline = (re->flags & PCRE2_STARTLINE) != 0; startline = (re->flags & PCRE2_STARTLINE) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
bumpalong_limit = end_subject;
/* Fill in the fields in the match block. */ /* Get data from the match context, if present, and fill in the fields in the
match block. It is an error to set an offset limit without setting the flag at
compile time. */
if (mcontext == NULL) if (mcontext == NULL)
{ {
@ -3215,6 +3214,12 @@ if (mcontext == NULL)
} }
else else
{ {
if (mcontext->offset_limit != PCRE2_UNSET)
{
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
bumpalong_limit = subject + mcontext->offset_limit;
}
mb->callout = mcontext->callout; mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data; mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl; mb->memctl = mcontext->memctl;
@ -3539,6 +3544,10 @@ for (;;)
/* ------------ End of start of match optimizations ------------ */ /* ------------ End of start of match optimizations ------------ */
/* Give no match if we have passed the bumpalong limit. */
if (start_match > bumpalong_limit) break;
/* OK, now we can do the business */ /* OK, now we can do the business */
mb->start_used_ptr = start_match; mb->start_used_ptr = start_match;

View File

@ -238,7 +238,9 @@ static const char match_error_texts[] =
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"
"recursion limit exceeded\0" "recursion limit exceeded\0"
"requested value is not available\0" "requested value is not available\0"
/* 55 */
"requested value is not set\0" "requested value is not set\0"
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
; ;

View File

@ -580,6 +580,7 @@ typedef struct pcre2_real_match_context {
#endif #endif
int (*callout)(pcre2_callout_block *, void *); int (*callout)(pcre2_callout_block *, void *);
void *callout_data; void *callout_data;
PCRE2_SIZE offset_limit;
uint32_t match_limit; uint32_t match_limit;
uint32_t recursion_limit; uint32_t recursion_limit;
} pcre2_real_match_context; } pcre2_real_match_context;

View File

@ -6462,6 +6462,7 @@ PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0; PCRE2_UCHAR req_cu = 0;
PCRE2_UCHAR req_cu2 = 0; PCRE2_UCHAR req_cu2 = 0;
PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR end_subject; PCRE2_SPTR end_subject;
PCRE2_SPTR start_match = subject + start_offset; PCRE2_SPTR start_match = subject + start_offset;
PCRE2_SPTR req_cu_ptr = start_match - 1; PCRE2_SPTR req_cu_ptr = start_match - 1;
@ -6537,10 +6538,10 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings, /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
we must also check that a starting offset does not point into the middle of a we must also check that a starting offset does not point into the middle of a
multiunit character. We check only the portion of the subject that is going to multiunit character. We check only the portion of the subject that is going to
be inspected during matching - from the offset minus the maximum back reference be inspected during matching - from the offset minus the maximum back reference
to the given length. This saves time when a small part of a large subject is to the given length. This saves time when a small part of a large subject is
being matched by the use of a starting offset. Note that the maximum lookbehind being matched by the use of a starting offset. Note that the maximum lookbehind
is a number of characters, not code units. */ is a number of characters, not code units. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
@ -6549,9 +6550,9 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */ PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
if (start_offset > 0) if (start_offset > 0)
{ {
#if PCRE2_CODE_UNIT_WIDTH != 32 #if PCRE2_CODE_UNIT_WIDTH != 32
unsigned int i; unsigned int i;
if (start_match < end_subject && NOT_FIRSTCU(*start_match)) if (start_match < end_subject && NOT_FIRSTCU(*start_match))
return PCRE2_ERROR_BADUTFOFFSET; return PCRE2_ERROR_BADUTFOFFSET;
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
@ -6563,27 +6564,34 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
#else /* 16-bit */ #else /* 16-bit */
(*check_subject & 0xfc00) == 0xdc00) (*check_subject & 0xfc00) == 0xdc00)
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
check_subject--; check_subject--;
} }
#else /* In the 32-bit library, one code unit equals one character. */ #else /* In the 32-bit library, one code unit equals one character. */
check_subject -= re->max_lookbehind; check_subject -= re->max_lookbehind;
if (check_subject < subject) check_subject = subject; if (check_subject < subject) check_subject = subject;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
} }
/* Validate the relevant portion of the subject. After an error, adjust the /* Validate the relevant portion of the subject. After an error, adjust the
offset to be an absolute offset in the whole string. */ offset to be an absolute offset in the whole string. */
match_data->rc = PRIV(valid_utf)(check_subject, match_data->rc = PRIV(valid_utf)(check_subject,
length - (check_subject - subject), &(match_data->startchar)); length - (check_subject - subject), &(match_data->startchar));
if (match_data->rc != 0) if (match_data->rc != 0)
{ {
match_data->startchar += check_subject - subject; match_data->startchar += check_subject - subject;
return match_data->rc; return match_data->rc;
} }
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
/* It is an error to set an offset limit without setting the flag at compile
time. */
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
(re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
/* If the pattern was successfully studied with JIT support, run the JIT /* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if compile time for the JIT code to be usable. Fallback to the normal code path if
@ -6591,6 +6599,13 @@ an unsupported option is set or if JIT returns BADOPTION (which means that the
selected normal or partial matching mode was not compiled). */ selected normal or partial matching mode was not compiled). */
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
/* +++ TEMPORARY: JIT does not yet support offset_limit. */
if (mcontext == NULL || mcontext->offset_limit == PCRE2_UNSET)
/* +++ */
if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0) if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
{ {
rc = pcre2_jit_match(code, subject, length, start_offset, options, rc = pcre2_jit_match(code, subject, length, start_offset, options,
@ -6604,8 +6619,10 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
startline = (re->flags & PCRE2_STARTLINE) != 0; startline = (re->flags & PCRE2_STARTLINE) != 0;
bumpalong_limit = end_subject;
/* Fill in the fields in the match block. */ /* Get data from the match context, if it exists, and fill in the fields in the
match block. */
if (mcontext == NULL) if (mcontext == NULL)
{ {
@ -6617,6 +6634,8 @@ if (mcontext == NULL)
} }
else else
{ {
if (mcontext->offset_limit != PCRE2_UNSET)
bumpalong_limit = subject + mcontext->offset_limit;
mb->callout = mcontext->callout; mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data; mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl; mb->memctl = mcontext->memctl;
@ -6970,6 +6989,14 @@ for(;;)
/* ------------ End of start of match optimizations ------------ */ /* ------------ End of start of match optimizations ------------ */
/* Give no match if we have passed the bumpalong limit. */
if (start_match > bumpalong_limit)
{
rc = MATCH_NOMATCH;
break;
}
/* OK, we can now run the match. If "hitend" is set afterwards, remember the /* OK, we can now run the match. If "hitend" is set afterwards, remember the
first starting point for which a partial match was found. */ first starting point for which a partial match was found. */
@ -7088,7 +7115,7 @@ for(;;)
(2) The pattern is anchored or the match was failed by (*COMMIT); (2) The pattern is anchored or the match was failed by (*COMMIT);
(3) We are past the end of the subject; (3) We are past the end of the subject or the bumpalong limit;
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
this option requests that a match occur at or before the first newline in this option requests that a match occur at or before the first newline in

View File

@ -379,6 +379,7 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_NL, /* Is a newline value */ MOD_NL, /* Is a newline value */
MOD_NN, /* Is a number or a name; more than one may occur */ MOD_NN, /* Is a number or a name; more than one may occur */
MOD_OPT, /* Is an option bit */ MOD_OPT, /* Is an option bit */
MOD_SIZ, /* Is a PCRE2_SIZE value */
MOD_STR }; /* Is a string */ MOD_STR }; /* Is a string */
/* Control bits. Some apply to compiling, some to matching, but some can be set /* Control bits. Some apply to compiling, some to matching, but some can be set
@ -550,6 +551,7 @@ static modstruct modlist[] = {
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
{ "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
{ "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
{ "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
{ "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
{ "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
@ -565,6 +567,7 @@ static modstruct modlist[] = {
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
{ "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
}; };
@ -1067,6 +1070,14 @@ are supported. */
else \ else \
pcre2_set_match_limit_32(G(a,32),b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_offset_limit_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
pcre2_set_offset_limit_16(G(a,16),b); \
else \
pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \ if (test_mode == PCRE8_MODE) \
pcre2_set_parens_nest_limit_8(G(a,8),b); \ pcre2_set_parens_nest_limit_8(G(a,8),b); \
@ -1467,6 +1478,12 @@ the three different cases. */
else \ else \
G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
else \
G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
@ -1665,6 +1682,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c) pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -1756,6 +1774,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c) pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -1847,6 +1866,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c) pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
@ -3235,6 +3255,12 @@ for (;;)
} }
/* Fall through */ /* Fall through */
case MOD_SIZ: /* PCRE2_SIZE value */
if (!isdigit(*pp)) goto INVALID_VALUE;
*((PCRE2_SIZE *)field) = (PCRE2_SIZE)strtoul((const char *)pp, &endptr, 10);
pp = (uint8_t *)endptr;
break;
case MOD_INT: /* Unsigned integer */ case MOD_INT: /* Unsigned integer */
if (!isdigit(*pp)) goto INVALID_VALUE; if (!isdigit(*pp)) goto INVALID_VALUE;
*((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10); *((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10);
@ -3431,7 +3457,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
((controls & CTL_BINCODE) != 0)? " bincode" : "", ((controls & CTL_BINCODE) != 0)? " bincode" : "",
((controls & CTL_BSR_SET) != 0)? " bsr" : "", ((controls & CTL_BSR_SET) != 0)? " bsr" : "",
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
@ -3446,7 +3472,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "", ((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "", ((controls & CTL_MEMORY) != 0)? " memory" : "",
((controls & CTL_NL_SET) != 0)? " newline" : "", ((controls & CTL_NL_SET) != 0)? " newline" : "",
((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_POSIX) != 0)? " posix" : "",
((controls & CTL_PUSH) != 0)? " push" : "", ((controls & CTL_PUSH) != 0)? " push" : "",
((controls & CTL_STARTCHAR) != 0)? " startchar" : "", ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
@ -3473,7 +3499,7 @@ static void
show_compile_options(uint32_t options, const char *before, const char *after) show_compile_options(uint32_t options, const char *before, const char *after)
{ {
if (options == 0) fprintf(outfile, "%s <none>%s", before, after); if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before, before,
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
@ -3499,6 +3525,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
((options & PCRE2_UCP) != 0)? " ucp" : "", ((options & PCRE2_UCP) != 0)? " ucp" : "",
((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
((options & PCRE2_UTF) != 0)? " utf" : "", ((options & PCRE2_UTF) != 0)? " utf" : "",
after); after);
} }
@ -4401,7 +4428,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg); show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg);
msg = ""; msg = "";
} }
if (local_newline_default != 0) prmsg(&msg, "#newline_default"); if (local_newline_default != 0) prmsg(&msg, "#newline_default");
if (msg[0] == 0) fprintf(outfile, "\n"); if (msg[0] == 0) fprintf(outfile, "\n");
@ -6975,7 +7002,7 @@ while (notdone)
skipping = FALSE; skipping = FALSE;
setlocale(LC_CTYPE, "C"); setlocale(LC_CTYPE, "C");
} }
else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
rc = process_data(); rc = process_data();
} }

18
testdata/testinput2 vendored
View File

@ -4515,4 +4515,22 @@ B)x/alt_verbnames,mark
/(*:abc\Qpqr)/alt_verbnames /(*:abc\Qpqr)/alt_verbnames
/abc/use_offset_limit
1234abcde\=offset_limit=100
1234abcde\=offset_limit=9
1234abcde\=offset_limit=4
1234abcde\=offset_limit=4,offset=4
\= Expect no match
1234abcde\=offset_limit=4,offset=5
1234abcde\=offset_limit=3
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
\= Expect no match
1234abc\=offset_limit=6
/abc/
\= Expect error
1234abcde\=offset_limit=4
# End of testinput2 # End of testinput2

14
testdata/testinput6 vendored
View File

@ -4868,4 +4868,18 @@
/^/gm,alt_circumflex /^/gm,alt_circumflex
\n\n\n \n\n\n
/abc/use_offset_limit
1234abcde\=offset_limit=100
1234abcde\=offset_limit=9
1234abcde\=offset_limit=4
1234abcde\=offset_limit=4,offset=4
\= Expect no match
1234abcde\=offset_limit=4,offset=5
1234abcde\=offset_limit=3
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
\= Expect no match
1234abc\=offset_limit=6
# End of testinput6 # End of testinput6

27
testdata/testoutput2 vendored
View File

@ -14633,4 +14633,31 @@ MK: A\x0aB
/(*:abc\Qpqr)/alt_verbnames /(*:abc\Qpqr)/alt_verbnames
Failed: error 160 at offset 12: (*VERB) not recognized or malformed Failed: error 160 at offset 12: (*VERB) not recognized or malformed
/abc/use_offset_limit
1234abcde\=offset_limit=100
0: abc
1234abcde\=offset_limit=9
0: abc
1234abcde\=offset_limit=4
0: abc
1234abcde\=offset_limit=4,offset=4
0: abc
\= Expect no match
1234abcde\=offset_limit=4,offset=5
No match
1234abcde\=offset_limit=3
No match
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
0:
\= Expect no match
1234abc\=offset_limit=6
No match
/abc/
\= Expect error
1234abcde\=offset_limit=4
Failed: error -56: offset limit set without PCRE2_USE_OFFSET_LIMIT
# End of testinput2 # End of testinput2

22
testdata/testoutput6 vendored
View File

@ -7655,4 +7655,26 @@ No match
0: 0:
0: 0:
/abc/use_offset_limit
1234abcde\=offset_limit=100
0: abc
1234abcde\=offset_limit=9
0: abc
1234abcde\=offset_limit=4
0: abc
1234abcde\=offset_limit=4,offset=4
0: abc
\= Expect no match
1234abcde\=offset_limit=4,offset=5
No match
1234abcde\=offset_limit=3
No match
/(?<=abc)/use_offset_limit
1234abc\=offset_limit=7
0:
\= Expect no match
1234abc\=offset_limit=6
No match
# End of testinput6 # End of testinput6