From 69d61e2fdb80ecec2cec35a91eac71e93b3e25b8 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 22 Sep 2015 11:24:28 +0000 Subject: [PATCH] Implement PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). No documentation yet. --- ChangeLog | 2 ++ src/pcre2.h.in | 5 ++++ src/pcre2_compile.c | 33 +++++++++++------------ src/pcre2_context.c | 8 ++++++ src/pcre2_dfa_match.c | 21 ++++++++++----- src/pcre2_error.c | 2 ++ src/pcre2_intmodedep.h | 1 + src/pcre2_match.c | 59 ++++++++++++++++++++++++++++++------------ src/pcre2test.c | 37 ++++++++++++++++++++++---- testdata/testinput2 | 18 +++++++++++++ testdata/testinput6 | 14 ++++++++++ testdata/testoutput2 | 27 +++++++++++++++++++ testdata/testoutput6 | 22 ++++++++++++++++ 13 files changed, 206 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index 339e7a1..895b88c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -187,6 +187,8 @@ the SSE2 instruction set. 52. It is now possible to have comment lines amid the subject strings in pcre2test (and perltest.sh) input. +53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 9aae0d9..24e5f26 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -121,6 +121,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */ #define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */ #define PCRE2_ALT_VERBNAMES 0x00400000u /* C */ +#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */ /* These are for pcre2_jit_compile(). */ @@ -234,6 +235,7 @@ numbers must not be changed. */ #define PCRE2_ERROR_RECURSIONLIMIT (-53) #define PCRE2_ERROR_UNAVAILABLE (-54) #define PCRE2_ERROR_UNSET (-55) +#define PCRE2_ERROR_BADOFFSETLIMIT (-56) /* Request types for pcre2_pattern_info() */ @@ -406,6 +408,8 @@ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ int (*)(pcre2_callout_block *, void *), void *); \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ uint32_t); \ +PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \ + PCRE2_SIZE); \ PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \ uint32_t); \ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \ @@ -609,6 +613,7 @@ pcre2_compile are called by application code. */ #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_) +#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_) #define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_) #define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_) #define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 47ead0f..2c881e0 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -566,7 +566,8 @@ static PCRE2_SPTR posix_substitutes[] = { PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ - PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_UTF) + PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) /* Compile time error code numbers. They are given names so that they can more easily be tracked. When a new number is added, the tables called eint1 and @@ -2797,8 +2798,8 @@ return n8; *************************************************/ /* This function is called when the PCRE2_ALT_VERBNAMES option is set, to -process the characters in a verb's name argument. It is called twice, once with -codeptr == NULL, to find out the length of the processed name, and again to put +process the characters in a verb's name argument. It is called twice, once with +codeptr == NULL, to find out the length of the processed name, and again to put the name into memory. Arguments: @@ -2837,12 +2838,12 @@ for (; ptr < cb->end_pattern; ptr++) } else /* Not a literal character */ - { + { if (x == CHAR_RIGHT_PARENTHESIS) break; - + /* Skip over comments and whitespace in extended mode. Need a loop to handle whitespace after a comment. */ - + if ((options & PCRE2_EXTENDED) != 0) { for (;;) @@ -2864,21 +2865,21 @@ for (; ptr < cb->end_pattern; ptr++) } x = *ptr; /* Either NULL or the char after a newline */ } - if (ptr >= cb->end_pattern) break; + if (ptr >= cb->end_pattern) break; } - + /* Process escapes */ - + if (x == '\\') { int rc; *errorcodeptr = 0; rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb); - *ptrptr = ptr; /* For possible error */ + *ptrptr = ptr; /* For possible error */ if (*errorcodeptr != 0) return -1; if (rc != 0) { - if (rc == ESC_Q) + if (rc == ESC_Q) { inescq = TRUE; continue; @@ -2888,8 +2889,8 @@ for (; ptr < cb->end_pattern; ptr++) return -1; } } - } - + } + /* We have the next character in the name. */ #ifdef SUPPORT_UNICODE @@ -5541,7 +5542,7 @@ for (;; ptr++) } else { - arglen = process_verb_name(&ptr, NULL, errorcodeptr, options, + arglen = process_verb_name(&ptr, NULL, errorcodeptr, options, utf, cb); if (arglen < 0) goto FAILED; } @@ -5616,9 +5617,9 @@ for (;; ptr++) if ((options & PCRE2_ALT_VERBNAMES) != 0) { PCRE2_UCHAR *memcode = code; /* code is "register" */ - (void)process_verb_name(&arg, &memcode, errorcodeptr, options, + (void)process_verb_name(&arg, &memcode, errorcodeptr, options, utf, cb); - code = memcode; + code = memcode; } else /* No argument processing */ { diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 6146999..5398dfe 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -169,6 +169,7 @@ const pcre2_match_context PRIV(default_match_context) = { #endif NULL, NULL, + PCRE2_UNSET, /* Offset limit */ MATCH_LIMIT, MATCH_LIMIT_RECURSION }; @@ -347,6 +348,13 @@ mcontext->match_limit = limit; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit) +{ +mcontext->offset_limit = limit; +return 0; +} + PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) { diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 3cfa454..be53f53 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -3116,6 +3116,7 @@ const pcre2_real_code *re = (const pcre2_real_code *)code; PCRE2_SPTR start_match; PCRE2_SPTR end_subject; +PCRE2_SPTR bumpalong_limit; PCRE2_SPTR req_cu_ptr; BOOL utf, anchored, startline, firstline; @@ -3176,11 +3177,6 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); #undef FF #undef OO -/* A NULL match context means "use a default context" */ - -if (mcontext == NULL) - mcontext = (pcre2_match_context *)(&PRIV(default_match_context)); - /* If restarting after a partial match, do some sanity checks on the contents of the workspace. */ @@ -3205,8 +3201,11 @@ where to start. */ startline = (re->flags & PCRE2_STARTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; +bumpalong_limit = end_subject; -/* Fill in the fields in the match block. */ +/* Get data from the match context, if present, and fill in the fields in the +match block. It is an error to set an offset limit without setting the flag at +compile time. */ if (mcontext == NULL) { @@ -3215,6 +3214,12 @@ if (mcontext == NULL) } else { + if (mcontext->offset_limit != PCRE2_UNSET) + { + if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + bumpalong_limit = subject + mcontext->offset_limit; + } mb->callout = mcontext->callout; mb->callout_data = mcontext->callout_data; mb->memctl = mcontext->memctl; @@ -3539,6 +3544,10 @@ for (;;) /* ------------ End of start of match optimizations ------------ */ + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) break; + /* OK, now we can do the business */ mb->start_used_ptr = start_match; diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 73ef317..9f1b480 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -238,7 +238,9 @@ static const char match_error_texts[] = "nested recursion at the same subject position\0" "recursion limit exceeded\0" "requested value is not available\0" + /* 55 */ "requested value is not set\0" + "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" ; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 85ceb06..1970cb2 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -580,6 +580,7 @@ typedef struct pcre2_real_match_context { #endif int (*callout)(pcre2_callout_block *, void *); void *callout_data; + PCRE2_SIZE offset_limit; uint32_t match_limit; uint32_t recursion_limit; } pcre2_real_match_context; diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 8875d6d..a5f77e7 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6462,6 +6462,7 @@ PCRE2_UCHAR first_cu2 = 0; PCRE2_UCHAR req_cu = 0; PCRE2_UCHAR req_cu2 = 0; +PCRE2_SPTR bumpalong_limit; PCRE2_SPTR end_subject; PCRE2_SPTR start_match = subject + start_offset; PCRE2_SPTR req_cu_ptr = start_match - 1; @@ -6537,10 +6538,10 @@ mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 : /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, we must also check that a starting offset does not point into the middle of a -multiunit character. We check only the portion of the subject that is going to -be inspected during matching - from the offset minus the maximum back reference -to the given length. This saves time when a small part of a large subject is -being matched by the use of a starting offset. Note that the maximum lookbehind +multiunit character. We check only the portion of the subject that is going to +be inspected during matching - from the offset minus the maximum back reference +to the given length. This saves time when a small part of a large subject is +being matched by the use of a starting offset. Note that the maximum lookbehind is a number of characters, not code units. */ #ifdef SUPPORT_UNICODE @@ -6549,9 +6550,9 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) PCRE2_SPTR check_subject = start_match; /* start_match includes offset */ if (start_offset > 0) - { + { #if PCRE2_CODE_UNIT_WIDTH != 32 - unsigned int i; + unsigned int i; if (start_match < end_subject && NOT_FIRSTCU(*start_match)) return PCRE2_ERROR_BADUTFOFFSET; for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) @@ -6563,27 +6564,34 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) #else /* 16-bit */ (*check_subject & 0xfc00) == 0xdc00) #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - check_subject--; - } + check_subject--; + } #else /* In the 32-bit library, one code unit equals one character. */ check_subject -= re->max_lookbehind; - if (check_subject < subject) check_subject = subject; + if (check_subject < subject) check_subject = subject; #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ } - + /* Validate the relevant portion of the subject. After an error, adjust the offset to be an absolute offset in the whole string. */ - - match_data->rc = PRIV(valid_utf)(check_subject, + + match_data->rc = PRIV(valid_utf)(check_subject, length - (check_subject - subject), &(match_data->startchar)); - if (match_data->rc != 0) + if (match_data->rc != 0) { match_data->startchar += check_subject - subject; return match_data->rc; - } + } } #endif /* SUPPORT_UNICODE */ +/* It is an error to set an offset limit without setting the flag at compile +time. */ + +if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET && + (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + /* If the pattern was successfully studied with JIT support, run the JIT executable instead of the rest of this function. Most options must be set at compile time for the JIT code to be usable. Fallback to the normal code path if @@ -6591,6 +6599,13 @@ an unsupported option is set or if JIT returns BADOPTION (which means that the selected normal or partial matching mode was not compiled). */ #ifdef SUPPORT_JIT + +/* +++ TEMPORARY: JIT does not yet support offset_limit. */ + +if (mcontext == NULL || mcontext->offset_limit == PCRE2_UNSET) + +/* +++ */ + if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0) { rc = pcre2_jit_match(code, subject, length, start_offset, options, @@ -6604,8 +6619,10 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0) anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; startline = (re->flags & PCRE2_STARTLINE) != 0; +bumpalong_limit = end_subject; -/* Fill in the fields in the match block. */ +/* Get data from the match context, if it exists, and fill in the fields in the +match block. */ if (mcontext == NULL) { @@ -6617,6 +6634,8 @@ if (mcontext == NULL) } else { + if (mcontext->offset_limit != PCRE2_UNSET) + bumpalong_limit = subject + mcontext->offset_limit; mb->callout = mcontext->callout; mb->callout_data = mcontext->callout_data; mb->memctl = mcontext->memctl; @@ -6970,6 +6989,14 @@ for(;;) /* ------------ End of start of match optimizations ------------ */ + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) + { + rc = MATCH_NOMATCH; + break; + } + /* OK, we can now run the match. If "hitend" is set afterwards, remember the first starting point for which a partial match was found. */ @@ -7088,7 +7115,7 @@ for(;;) (2) The pattern is anchored or the match was failed by (*COMMIT); -(3) We are past the end of the subject; +(3) We are past the end of the subject or the bumpalong limit; (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because this option requests that a match occur at or before the first newline in diff --git a/src/pcre2test.c b/src/pcre2test.c index b12f310..5e7c994 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -379,6 +379,7 @@ enum { MOD_CTC, /* Applies to a compile context */ MOD_NL, /* Is a newline value */ MOD_NN, /* Is a number or a name; more than one may occur */ MOD_OPT, /* Is an option bit */ + MOD_SIZ, /* Is a PCRE2_SIZE value */ MOD_STR }; /* Is a string */ /* Control bits. Some apply to compiling, some to matching, but some can be set @@ -550,6 +551,7 @@ static modstruct modlist[] = { { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, + { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, @@ -565,6 +567,7 @@ static modstruct modlist[] = { { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, + { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) }, { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } }; @@ -1067,6 +1070,14 @@ are supported. */ else \ pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_offset_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_offset_limit_16(G(a,16),b); \ + else \ + pcre2_set_offset_limit_32(G(a,32),b) + #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ if (test_mode == PCRE8_MODE) \ pcre2_set_parens_nest_limit_8(G(a,8),b); \ @@ -1467,6 +1478,12 @@ the three different cases. */ else \ G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b) + #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ @@ -1665,6 +1682,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_8(G(a,8),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -1756,6 +1774,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_16(G(a,16),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -1847,6 +1866,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_32(G(a,32),b,c) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) +#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ @@ -3235,6 +3255,12 @@ for (;;) } /* Fall through */ + case MOD_SIZ: /* PCRE2_SIZE value */ + if (!isdigit(*pp)) goto INVALID_VALUE; + *((PCRE2_SIZE *)field) = (PCRE2_SIZE)strtoul((const char *)pp, &endptr, 10); + pp = (uint8_t *)endptr; + break; + case MOD_INT: /* Unsigned integer */ if (!isdigit(*pp)) goto INVALID_VALUE; *((uint32_t *)field) = (uint32_t)strtoul((const char *)pp, &endptr, 10); @@ -3431,7 +3457,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", ((controls & CTL_BINCODE) != 0)? " bincode" : "", - ((controls & CTL_BSR_SET) != 0)? " bsr" : "", + ((controls & CTL_BSR_SET) != 0)? " bsr" : "", ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", @@ -3446,7 +3472,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", ((controls & CTL_MARK) != 0)? " mark" : "", ((controls & CTL_MEMORY) != 0)? " memory" : "", - ((controls & CTL_NL_SET) != 0)? " newline" : "", + ((controls & CTL_NL_SET) != 0)? " newline" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_PUSH) != 0)? " push" : "", ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", @@ -3473,7 +3499,7 @@ static void show_compile_options(uint32_t options, const char *before, const char *after) { if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", @@ -3499,6 +3525,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", ((options & PCRE2_UCP) != 0)? " ucp" : "", ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", + ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", ((options & PCRE2_UTF) != 0)? " utf" : "", after); } @@ -4401,7 +4428,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, msg); msg = ""; } - + if (local_newline_default != 0) prmsg(&msg, "#newline_default"); if (msg[0] == 0) fprintf(outfile, "\n"); @@ -6975,7 +7002,7 @@ while (notdone) skipping = FALSE; setlocale(LC_CTYPE, "C"); } - else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) + else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) rc = process_data(); } diff --git a/testdata/testinput2 b/testdata/testinput2 index 08da226..b55f051 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4515,4 +4515,22 @@ B)x/alt_verbnames,mark /(*:abc\Qpqr)/alt_verbnames +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 1234abcde\=offset_limit=9 + 1234abcde\=offset_limit=4 + 1234abcde\=offset_limit=4,offset=4 +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 + 1234abcde\=offset_limit=3 + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 +\= Expect no match + 1234abc\=offset_limit=6 + +/abc/ +\= Expect error + 1234abcde\=offset_limit=4 + # End of testinput2 diff --git a/testdata/testinput6 b/testdata/testinput6 index 9297b63..4299b87 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4868,4 +4868,18 @@ /^/gm,alt_circumflex \n\n\n +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 1234abcde\=offset_limit=9 + 1234abcde\=offset_limit=4 + 1234abcde\=offset_limit=4,offset=4 +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 + 1234abcde\=offset_limit=3 + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 +\= Expect no match + 1234abc\=offset_limit=6 + # End of testinput6 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 6abd7c4..d375165 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14633,4 +14633,31 @@ MK: A\x0aB /(*:abc\Qpqr)/alt_verbnames Failed: error 160 at offset 12: (*VERB) not recognized or malformed +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 0: abc + 1234abcde\=offset_limit=9 + 0: abc + 1234abcde\=offset_limit=4 + 0: abc + 1234abcde\=offset_limit=4,offset=4 + 0: abc +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 +No match + 1234abcde\=offset_limit=3 +No match + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 + 0: +\= Expect no match + 1234abc\=offset_limit=6 +No match + +/abc/ +\= Expect error + 1234abcde\=offset_limit=4 +Failed: error -56: offset limit set without PCRE2_USE_OFFSET_LIMIT + # End of testinput2 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index e793a41..be37604 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7655,4 +7655,26 @@ No match 0: 0: +/abc/use_offset_limit + 1234abcde\=offset_limit=100 + 0: abc + 1234abcde\=offset_limit=9 + 0: abc + 1234abcde\=offset_limit=4 + 0: abc + 1234abcde\=offset_limit=4,offset=4 + 0: abc +\= Expect no match + 1234abcde\=offset_limit=4,offset=5 +No match + 1234abcde\=offset_limit=3 +No match + +/(?<=abc)/use_offset_limit + 1234abc\=offset_limit=7 + 0: +\= Expect no match + 1234abc\=offset_limit=6 +No match + # End of testinput6