From 88db1be12340102c2567c7aa3305d415b2e2dbfb Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 12 Mar 2017 13:47:01 +0000 Subject: [PATCH] Change "limit recursion" to "limit depth" in code and build systems because the old name, which is retained as a synonym for the moment, is no longer appropriate. --- CMakeLists.txt | 6 ++-- ChangeLog | 11 +++++-- configure.ac | 57 ++++++++++++++++++++++----------- src/config.h.in | 13 ++++---- src/pcre2.h | 23 +++++++++----- src/pcre2.h.in | 23 +++++++++----- src/pcre2_compile.c | 13 ++++---- src/pcre2_config.c | 10 +++--- src/pcre2_context.c | 12 +++---- src/pcre2_dfa_match.c | 10 +++--- src/pcre2_error.c | 4 +-- src/pcre2_fuzzsupport.c | 6 ++-- src/pcre2_internal.h | 2 ++ src/pcre2_intmodedep.h | 8 ++--- src/pcre2_match.c | 10 +++--- src/pcre2_pattern_info.c | 14 ++++---- src/pcre2grep.c | 17 +++++----- src/pcre2test.c | 69 ++++++++++++++++++++-------------------- testdata/testinput15 | 14 ++++---- testdata/testinput6 | 4 +-- testdata/testoutput15 | 56 ++++++++++++++++---------------- testdata/testoutput6 | 6 ++-- 22 files changed, 217 insertions(+), 171 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab7d08a..9b89a9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,8 +146,8 @@ SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.") -SET(PCRE2_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING - "Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.") +SET(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING + "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.") SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.") @@ -766,7 +766,7 @@ IF(PCRE2_SHOW_REPORT) MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}") MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}") MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}") - MESSAGE(STATUS " Match limit recursion ........... : ${PCRE2_MATCH_LIMIT_RECURSION}") + MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}") MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build pcre2grep ................. : ${PCRE2_BUILD_PCRE2GREP}") diff --git a/ChangeLog b/ChangeLog index f908155..29f02fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -29,13 +29,20 @@ released code, but are noted here for the record. crash. A vector on the heap is now set up at the start of matching if the vector on the stack is not big enough to handle at least 10 frames. Fixes oss-fuzz issue 783. + +2. Now that pcre2_match() no longer uses recursive function calls (see above), +the "match limit recursion" value seems misnamed. It still exists, and limits +the depth of tree that is searched. To avoid future confusion, it has been +renamed as "depth limit" in all relevant places (--with-depth-limit, +(*LIMIT_DEPTH), pcre2_set_depth_limit(), etc) but the old names are still +available for backwards compatibility. -2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers: +3. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers: (a) Check for malloc failures when getting memory for the ovector (POSIX) or the match data block (non-POSIX). -3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property +4. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property for a character with a code point greater than 0x10ffff (the Unicode maximum) caused a crash. diff --git a/configure.ac b/configure.ac index cb9bddb..bebf6d3 100644 --- a/configure.ac +++ b/configure.ac @@ -269,18 +269,22 @@ AC_ARG_WITH(match-limit, [default limit on internal looping (default=10000000)]), , with_match_limit=10000000) -# Handle --with-match-limit_recursion=N +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion # -# Note: In config.h, the default is to define MATCH_LIMIT_RECURSION -# symbolically as MATCH_LIMIT, which in turn is defined to be some numeric -# value (e.g. 10000000). MATCH_LIMIT_RECURSION can otherwise be set to some -# different numeric value (or even the same numeric value as MATCH_LIMIT, -# though no longer defined in terms of the latter). +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). # -AC_ARG_WITH(match-limit-recursion, - AS_HELP_STRING([--with-match-limit-recursion=N], - [default limit on internal recursion (default=MATCH_LIMIT)]), - , with_match_limit_recursion=MATCH_LIMIT) +AC_ARG_WITH(match-limit-depth, + AS_HELP_STRING([--with-match-limit-depth=N], + [default limit on match tree depth (default=MATCH_LIMIT)]), + , with_match_limit_depth=MATCH_LIMIT) + +AC_ARG_WITH(match-limit-recursion,, + , with_match_limit_recursion=UNSET) # Handle --enable-valgrind AC_ARG_ENABLE(valgrind, @@ -299,7 +303,7 @@ AC_ARG_ENABLE(fuzz_support, AS_HELP_STRING([--enable-fuzz-support], [enable fuzzer support]), , enable_fuzz_support=no) - + # Handle --disable-stack-for-recursion # This option became obsolete at release 10.30. AC_ARG_ENABLE(stack-for-recursion,, @@ -683,14 +687,29 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ to determine that they do not match. The default is set very large so that it does not accidentally catch legitimate cases.]) -AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [ +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <= 2 && break; case PSO_LIMM: - case PSO_LIMR: + case PSO_LIMD: c = 0; pp = skipatstart; if (!IS_DIGIT(ptr[pp])) @@ -9015,7 +9016,7 @@ while (patlen - skipatstart >= 2 && goto HAD_EARLY_ERROR; } if (p->type == PSO_LIMM) limit_match = c; - else limit_recursion = c; + else limit_depth = c; skipatstart += pp - skipatstart; break; } @@ -9258,7 +9259,7 @@ re->compile_options = options; re->overall_options = cb.external_options; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->limit_match = limit_match; -re->limit_recursion = limit_recursion; +re->limit_depth = limit_depth; re->first_codeunit = 0; re->last_codeunit = 0; re->bsr_convention = bsr; diff --git a/src/pcre2_config.c b/src/pcre2_config.c index bd6bad8..73b21b8 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -87,9 +87,9 @@ if (where == NULL) /* Requests a length */ case PCRE2_CONFIG_JIT: case PCRE2_CONFIG_LINKSIZE: case PCRE2_CONFIG_MATCHLIMIT: + case PCRE2_CONFIG_DEPTHLIMIT: case PCRE2_CONFIG_NEWLINE: case PCRE2_CONFIG_PARENSLIMIT: - case PCRE2_CONFIG_RECURSIONLIMIT: case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */ case PCRE2_CONFIG_UNICODE: return sizeof(uint32_t); @@ -143,6 +143,10 @@ switch (what) *((uint32_t *)where) = MATCH_LIMIT; break; + case PCRE2_CONFIG_DEPTHLIMIT: + *((uint32_t *)where) = MATCH_LIMIT_DEPTH; + break; + case PCRE2_CONFIG_NEWLINE: *((uint32_t *)where) = NEWLINE_DEFAULT; break; @@ -151,10 +155,6 @@ switch (what) *((uint32_t *)where) = PARENS_NEST_LIMIT; break; - case PCRE2_CONFIG_RECURSIONLIMIT: - *((uint32_t *)where) = MATCH_LIMIT_RECURSION; - break; - /* This is now obsolete. The stack is no longer used via recursion for handling backtracking in pcre2_match(). */ diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 448b71a..653b2c3 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -169,7 +169,7 @@ const pcre2_match_context PRIV(default_match_context) = { NULL, PCRE2_UNSET, /* Offset limit */ MATCH_LIMIT, - MATCH_LIMIT_RECURSION }; + MATCH_LIMIT_DEPTH }; /* The create function copies the default into the new memory, but must override the default memory handling functions if a gcontext was provided. */ @@ -354,16 +354,16 @@ return 0; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit) +pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit) { -mcontext->offset_limit = limit; +mcontext->depth_limit = limit; return 0; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit) +pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit) { -mcontext->recursion_limit = limit; +mcontext->offset_limit = limit; return 0; } diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 471e159..72c6f18 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -400,7 +400,7 @@ BOOL utf = FALSE; BOOL reset_could_continue = FALSE; -if (rlevel++ > mb->match_limit_recursion) return PCRE2_ERROR_RECURSIONLIMIT; +if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; offsetcount &= (uint32_t)(-2); /* Round down */ wscount -= 2; @@ -3215,7 +3215,7 @@ if (mcontext == NULL) { mb->callout = NULL; mb->memctl = re->memctl; - mb->match_limit_recursion = PRIV(default_match_context).recursion_limit; + mb->match_limit_depth = PRIV(default_match_context).depth_limit; } else { @@ -3228,10 +3228,10 @@ else mb->callout = mcontext->callout; mb->callout_data = mcontext->callout_data; mb->memctl = mcontext->memctl; - mb->match_limit_recursion = mcontext->recursion_limit; + mb->match_limit_depth = mcontext->depth_limit; } -if (mb->match_limit_recursion > re->limit_recursion) - mb->match_limit_recursion = re->limit_recursion; +if (mb->match_limit_depth > re->limit_depth) + mb->match_limit_depth = re->limit_depth; mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + re->name_count * re->name_entry_size; diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 437bdfd..9eab4fc 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -244,7 +244,7 @@ static const unsigned char match_error_texts[] = "non-unique substring name\0" "NULL argument passed\0" "nested recursion at the same subject position\0" - "recursion limit exceeded\0" + "matching depth limit exceeded\0" "requested value is not available\0" /* 55 */ "requested value is not set\0" diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index 462b48a..580475a 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -141,8 +141,8 @@ for (i = 0; i < 2; i++) int j; uint32_t save_match_options = match_options; - /* Create match data and context blocks only when we first need them. Set - low match and recursion limits to avoid wasting too much searching large + /* Create match data and context blocks only when we first need them. Set + low match and depth limits to avoid wasting too much searching large pattern trees. Almost all matches are going to fail. */ if (match_data == NULL) @@ -168,7 +168,7 @@ for (i = 0; i < 2; i++) return 0; } (void)pcre2_set_match_limit(match_context, 100); - (void)pcre2_set_recursion_limit(match_context, 100); + (void)pcre2_set_depth_limit(match_context, 100); (void)pcre2_set_callout(match_context, callout_function, &callout_count); } diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 654cc62..e516314 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -923,6 +923,7 @@ a positive value. */ #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" +#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_MARK "MARK" @@ -1196,6 +1197,7 @@ only. */ #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_MARK STR_M STR_A STR_R STR_K diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 1ba8cf1..ec4aba1 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -586,7 +586,7 @@ typedef struct pcre2_real_match_context { void *callout_data; PCRE2_SIZE offset_limit; uint32_t match_limit; - uint32_t recursion_limit; + uint32_t depth_limit; } pcre2_real_match_context; /* The real compiled code structure. The type for the blocksize field is @@ -615,7 +615,7 @@ typedef struct pcre2_real_code { uint32_t overall_options; /* Options after processing the pattern */ uint32_t flags; /* Various state flags */ uint32_t limit_match; /* Limit set in the pattern */ - uint32_t limit_recursion; /* Limit set in the pattern */ + uint32_t limit_depth; /* Limit set in the pattern */ uint32_t first_codeunit; /* Starting code unit */ uint32_t last_codeunit; /* This codeunit must be seen */ uint16_t bsr_convention; /* What \R matches */ @@ -810,7 +810,7 @@ typedef struct match_block { heapframe *stack_frames; /* The original vector on the stack */ uint32_t match_call_count; /* Number of times a new frame is created */ uint32_t match_limit; /* As it says */ - uint32_t match_limit_recursion; /* As it says */ + uint32_t match_limit_depth; /* As it says */ BOOL hitend; /* Hit the end of the subject at some point */ BOOL hasthen; /* Pattern contains (*THEN) */ const uint8_t *lcc; /* Points to lower casing table */ @@ -856,7 +856,7 @@ typedef struct dfa_match_block { PCRE2_SPTR last_used_ptr; /* Latest consulted character */ const uint8_t *tables; /* Character tables */ PCRE2_SIZE start_offset; /* The start offset value */ - uint32_t match_limit_recursion; /* As it says */ + uint32_t match_limit_depth; /* As it says */ uint32_t moptions; /* Match options */ uint32_t poptions; /* Pattern options */ uint32_t nltype; /* Newline type */ diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 7667e1d..3b57629 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -548,7 +548,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 MATCH_NOMATCH if failed to match ) negative MATCH_xxx value for PRUNE, SKIP, etc negative PCRE2_ERROR_xxx value if aborted by an error condition - (e.g. stopped by repeated call or recursion limit) + (e.g. stopped by repeated call or depth limit) */ static int @@ -708,7 +708,7 @@ recursive depth limit (used too many backtracking frames). If not, process the opcodes. */ if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT; -if (Frdepth >= mb->match_limit_recursion) return PCRE2_ERROR_RECURSIONLIMIT; +if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; for (;;) { @@ -6272,9 +6272,9 @@ memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff, smaller. */ mb->match_limit = (mcontext->match_limit < re->limit_match)? - mcontext->match_limit : re->limit_match; -mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)? - mcontext->recursion_limit : re->limit_recursion; + mcontext->match_limit : re->limit_match; +mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? + mcontext->depth_limit : re->limit_depth; /* Pointers to the individual character tables */ diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index 5b32a90..98482f9 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -75,6 +75,7 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_BACKREFMAX: case PCRE2_INFO_BSR: case PCRE2_INFO_CAPTURECOUNT: + case PCRE2_INFO_DEPTHLIMIT: case PCRE2_INFO_FIRSTCODETYPE: case PCRE2_INFO_FIRSTCODEUNIT: case PCRE2_INFO_HASBACKSLASHC: @@ -89,7 +90,6 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_NAMEENTRYSIZE: case PCRE2_INFO_NAMECOUNT: case PCRE2_INFO_NEWLINE: - case PCRE2_INFO_RECURSIONLIMIT: return sizeof(uint32_t); case PCRE2_INFO_FIRSTBITMAP: @@ -137,6 +137,11 @@ switch(what) *((uint32_t *)where) = re->top_bracket; break; + case PCRE2_INFO_DEPTHLIMIT: + *((uint32_t *)where) = re->limit_depth; + if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET; + break; + case PCRE2_INFO_FIRSTCODETYPE: *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; @@ -215,11 +220,6 @@ switch(what) *((uint32_t *)where) = re->newline_convention; break; - case PCRE2_INFO_RECURSIONLIMIT: - *((uint32_t *)where) = re->limit_recursion; - if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; - break; - case PCRE2_INFO_SIZE: *((size_t *)where) = re->blocksize; break; diff --git a/src/pcre2grep.c b/src/pcre2grep.c index e98d743..4e42278 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. The header can be found in the special z/OS distribution, which is available from www.zaconsultants.net or from www.cbttape.org. - Copyright (c) 1997-2016 University of Cambridge + Copyright (c) 1997-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -211,7 +211,7 @@ static const uint8_t *character_tables = NULL; static uint32_t pcre2_options = 0; static uint32_t process_options = 0; static uint32_t match_limit = 0; -static uint32_t recursion_limit = 0; +static uint32_t depth_limit = 0; static pcre2_compile_context *compile_context; static pcre2_match_context *match_context; @@ -355,7 +355,7 @@ used to identify them. */ #define N_FOFFSETS (-11) #define N_LBUFFER (-12) #define N_M_LIMIT (-13) -#define N_M_LIMIT_REC (-14) +#define N_M_LIMIT_DEP (-14) #define N_BUFSIZE (-15) #define N_NOJIT (-16) #define N_FILE_LIST (-17) @@ -395,8 +395,9 @@ static option_item optionlist[] = { { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, - { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" }, - { OP_U32NUMBER, N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" }, + { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" }, + { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" }, + { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" }, { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, @@ -523,7 +524,7 @@ if (resource_error) { fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit " "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, - PCRE2_ERROR_RECURSIONLIMIT); + PCRE2_ERROR_DEPTHLIMIT); fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); } exit(rc); @@ -1639,7 +1640,7 @@ for (i = 1; p != NULL; p = p->next, i++) fprintf(stderr, "%s", msg); FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ fprintf(stderr, "\n\n"); - if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT || + if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT) resource_error = TRUE; if (error_count++ > 20) @@ -3530,7 +3531,7 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) || /* Put limits into the match data block. */ if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); -if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); +if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit); if (only_matching != NULL || file_offsets || line_offsets) show_only_matching = TRUE; diff --git a/src/pcre2test.c b/src/pcre2test.c index 46daec5..8c6cd9d 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam. Written by Philip Hazel Original code Copyright (c) 1997-2012 University of Cambridge - Rewritten code Copyright (c) 2016 University of Cambridge + Rewritten code Copyright (c) 2016-2017 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -564,6 +564,7 @@ static modstruct modlist[] = { { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, + { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, @@ -619,7 +620,7 @@ static modstruct modlist[] = { { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) }, - { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, + { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */ { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, @@ -1185,6 +1186,14 @@ are supported. */ else \ pcre2_set_compile_recursion_guard_32(G(a,32),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) \ + if (test_mode == PCRE8_MODE) \ + pcre2_set_depth_limit_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + pcre2_set_depth_limit_16(G(a,16),b); \ + else \ + pcre2_set_depth_limit_32(G(a,32),b) + #define PCRE2_SET_MATCH_LIMIT(a,b) \ if (test_mode == PCRE8_MODE) \ pcre2_set_match_limit_8(G(a,8),b); \ @@ -1217,14 +1226,6 @@ are supported. */ else \ pcre2_set_parens_nest_limit_32(G(a,32),b) -#define PCRE2_SET_RECURSION_LIMIT(a,b) \ - if (test_mode == PCRE8_MODE) \ - pcre2_set_recursion_limit_8(G(a,8),b); \ - else if (test_mode == PCRE16_MODE) \ - pcre2_set_recursion_limit_16(G(a,16),b); \ - else \ - pcre2_set_recursion_limit_32(G(a,32),b) - #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ if (test_mode == PCRE8_MODE) \ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ @@ -1620,6 +1621,12 @@ the three different cases. */ else \ G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \ + else \ + G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) + #define PCRE2_SET_MATCH_LIMIT(a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ @@ -1644,12 +1651,6 @@ the three different cases. */ else \ G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b) -#define PCRE2_SET_RECURSION_LIMIT(a,b) \ - if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \ - else \ - G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b) - #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ @@ -1838,11 +1839,11 @@ the three different cases. */ #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_8(G(a,8),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) -#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l) @@ -1934,11 +1935,11 @@ the three different cases. */ #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b) #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_16(G(a,16),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) -#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l) @@ -2030,11 +2031,11 @@ the three different cases. */ #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b) #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_32(G(a,32),b,c) +#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) -#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) @@ -3937,11 +3938,11 @@ if ((pat_patctl.control & CTL_INFO) != 0) { void *nametable; uint8_t *start_bits; - BOOL match_limit_set, recursion_limit_set; + BOOL match_limit_set, depth_limit_set; uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit, minlength, nameentrysize, namecount, newline_convention, - recursion_limit; + depth_limit; /* These info requests may return PCRE2_ERROR_UNSET. */ @@ -3959,14 +3960,14 @@ if ((pat_patctl.control & CTL_INFO) != 0) return PR_ABEND; } - switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE)) + switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE)) { case 0: - recursion_limit_set = TRUE; + depth_limit_set = TRUE; break; case PCRE2_ERROR_UNSET: - recursion_limit_set = FALSE; + depth_limit_set = FALSE; break; default: @@ -4006,8 +4007,8 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (match_limit_set) fprintf(outfile, "Match limit = %u\n", match_limit); - if (recursion_limit_set) - fprintf(outfile, "Recursion limit = %u\n", recursion_limit); + if (depth_limit_set) + fprintf(outfile, "Depth limit = %u\n", depth_limit); if (namecount > 0) { @@ -5228,7 +5229,7 @@ return PR_OK; /************************************************* -* Check match or recursion limit * +* Check match or depth limit * *************************************************/ static int @@ -5240,7 +5241,7 @@ uint32_t mid = 64; uint32_t max = UINT32_MAX; PCRE2_SET_MATCH_LIMIT(dat_context, max); -PCRE2_SET_RECURSION_LIMIT(dat_context, max); +PCRE2_SET_DEPTH_LIMIT(dat_context, max); for (;;) { @@ -5250,7 +5251,7 @@ for (;;) } else { - PCRE2_SET_RECURSION_LIMIT(dat_context, mid); + PCRE2_SET_DEPTH_LIMIT(dat_context, mid); } if ((pat_patctl.control & CTL_JITFAST) != 0) @@ -6547,15 +6548,15 @@ else for (gmatched = 0;; gmatched++) (double)CLOCKS_PER_SEC); } - /* Find the match and recursion limits if requested. The recursion limit + /* Find the match and depth limits if requested. The depth limit is not relevant for JIT. */ if ((dat_datctl.control & CTL_FINDLIMITS) != 0) { capcount = check_match_limit(pp, ulen, PCRE2_ERROR_MATCHLIMIT, "match"); if (FLD(compiled_code, executable_jit) == NULL) - (void)check_match_limit(pp, ulen, PCRE2_ERROR_RECURSIONLIMIT, - "recursion"); + (void)check_match_limit(pp, ulen, PCRE2_ERROR_DEPTHLIMIT, + "depth"); } /* Otherwise just run a single match, setting up a callout if required (the @@ -7285,8 +7286,8 @@ printf(" Internal link size = %d\n", optval); printf(" Parentheses nest limit = %d\n", optval); (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval); printf(" Default match limit = %d\n", optval); -(void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval); -printf(" Default recursion depth limit = %d\n", optval); +(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval); +printf(" Default depth limit = %d\n", optval); return 0; } diff --git a/testdata/testinput15 b/testdata/testinput15 index b1aaf78..f669e32 100644 --- a/testdata/testinput15 +++ b/testdata/testinput15 @@ -43,14 +43,14 @@ /(*LIMIT_MATCH=4294967290)abc/ -/(*LIMIT_RECURSION=4294967280)abc/I +/(*LIMIT_DEPTH=4294967280)abc/I /(a+)*zz/ aaaaaaaaaaaaaz aaaaaaaaaaaaaz\=match_limit=3000 /(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 + aaaaaaaaaaaaaz\=depth_limit=10 /(*LIMIT_MATCH=3000)(a+)*zz/I aaaaaaaaaaaaaz @@ -63,16 +63,16 @@ aaaaaaaaaaaaaz aaaaaaaaaaaaaz\=match_limit=3000 -/(*LIMIT_RECURSION=10)(a+)*zz/I +/(*LIMIT_DEPTH=10)(a+)*zz/I aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=1000 + aaaaaaaaaaaaaz\=depth_limit=1000 -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I aaaaaaaaaaaaaz -/(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=1000)(a+)*zz/I aaaaaaaaaaaaaz - aaaaaaaaaaaaaz\=recursion_limit=10 + aaaaaaaaaaaaaz\=depth_limit=10 # These three have infinitely nested recursions. diff --git a/testdata/testinput6 b/testdata/testinput6 index ee6cd1b..a7055a8 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4882,8 +4882,8 @@ aaa\=allcaptures a\=allcaptures -/(*LIMIT_RECURSION=100)^((.)(?1)|.)$/ -\= Expect recursion limit exceeded +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] # End of testinput6 diff --git a/testdata/testoutput15 b/testdata/testoutput15 index 60fab6c..68bbfac 100644 --- a/testdata/testoutput15 +++ b/testdata/testoutput15 @@ -13,12 +13,12 @@ Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits Minimum match limit = 7 -Minimum recursion limit = 7 +Minimum depth limit = 7 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaz\=find_limits Minimum match limit = 20481 -Minimum recursion limit = 30 +Minimum depth limit = 30 No match !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I @@ -27,60 +27,60 @@ May match empty string Subject length lower bound = 0 /* this is a C style comment */\=find_limits Minimum match limit = 64 -Minimum recursion limit = 7 +Minimum depth limit = 7 0: /* this is a C style comment */ 1: /* this is a C style comment */ /^(?>a)++/ aa\=find_limits Minimum match limit = 5 -Minimum recursion limit = 3 +Minimum depth limit = 3 0: aa aaaaaaaaa\=find_limits Minimum match limit = 12 -Minimum recursion limit = 3 +Minimum depth limit = 3 0: aaaaaaaaa /(a)(?1)++/ aa\=find_limits Minimum match limit = 7 -Minimum recursion limit = 5 +Minimum depth limit = 5 0: aa 1: a aaaaaaaaa\=find_limits Minimum match limit = 21 -Minimum recursion limit = 5 +Minimum depth limit = 5 0: aaaaaaaaa 1: a /a(?:.)*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits Minimum match limit = 24 -Minimum recursion limit = 3 +Minimum depth limit = 3 0: abbbbbbbbbbbbbbbbbbbbba /a(?:.(*THEN))*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits Minimum match limit = 66 -Minimum recursion limit = 45 +Minimum depth limit = 45 0: abbbbbbbbbbbbbbbbbbbbba /a(?:.(*THEN:ABC))*?a/ims abbbbbbbbbbbbbbbbbbbbba\=find_limits Minimum match limit = 66 -Minimum recursion limit = 45 +Minimum depth limit = 45 0: abbbbbbbbbbbbbbbbbbbbba /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ aabbccddee\=find_limits Minimum match limit = 7 -Minimum recursion limit = 7 +Minimum depth limit = 7 0: aabbccddee /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ aabbccddee\=find_limits Minimum match limit = 12 -Minimum recursion limit = 12 +Minimum depth limit = 12 0: aabbccddee 1: aa 2: bb @@ -91,7 +91,7 @@ Minimum recursion limit = 12 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ aabbccddee\=find_limits Minimum match limit = 10 -Minimum recursion limit = 10 +Minimum depth limit = 10 0: aabbccddee 1: aa 2: cc @@ -103,9 +103,9 @@ Failed: error 160 at offset 17: (*VERB) not recognized or malformed /(*LIMIT_MATCH=4294967290)abc/ Failed: error 160 at offset 24: (*VERB) not recognized or malformed -/(*LIMIT_RECURSION=4294967280)abc/I +/(*LIMIT_DEPTH=4294967280)abc/I Capturing subpattern count = 0 -Recursion limit = 4294967280 +Depth limit = 4294967280 First code unit = 'a' Last code unit = 'c' Subject length lower bound = 3 @@ -117,8 +117,8 @@ No match Failed: error -47: match limit exceeded /(a+)*zz/ - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded /(*LIMIT_MATCH=3000)(a+)*zz/I Capturing subpattern count = 1 @@ -151,36 +151,36 @@ No match aaaaaaaaaaaaaz\=match_limit=3000 Failed: error -47: match limit exceeded -/(*LIMIT_RECURSION=10)(a+)*zz/I +/(*LIMIT_DEPTH=10)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 10 +Depth limit = 10 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz -Failed: error -53: recursion limit exceeded - aaaaaaaaaaaaaz\=recursion_limit=1000 -Failed: error -53: recursion limit exceeded +Failed: error -53: matching depth limit exceeded + aaaaaaaaaaaaaz\=depth_limit=1000 +Failed: error -53: matching depth limit exceeded -/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 1000 +Depth limit = 1000 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz No match -/(*LIMIT_RECURSION=1000)(a+)*zz/I +/(*LIMIT_DEPTH=1000)(a+)*zz/I Capturing subpattern count = 1 -Recursion limit = 1000 +Depth limit = 1000 Starting code units: a z Last code unit = 'z' Subject length lower bound = 2 aaaaaaaaaaaaaz No match - aaaaaaaaaaaaaz\=recursion_limit=10 -Failed: error -53: recursion limit exceeded + aaaaaaaaaaaaaz\=depth_limit=10 +Failed: error -53: matching depth limit exceeded # These three have infinitely nested recursions. diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 7e20e52..edbf1bb 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7680,9 +7680,9 @@ No match ** Ignored after DFA matching: allcaptures 0: a -/(*LIMIT_RECURSION=100)^((.)(?1)|.)$/ -\= Expect recursion limit exceeded +/(*LIMIT_DEPTH=100)^((.)(?1)|.)$/ +\= Expect depth limit exceeded a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00] -Failed: error -53: recursion limit exceeded +Failed: error -53: matching depth limit exceeded # End of testinput6