diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index 8adcede..a8f3a7e 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ #define PCRE2_MATCH_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create_from_pattern(const pcre2_code *, \ pcre2_general_context *); \ @@ -868,6 +870,7 @@ pcre2_compile are called by application code. */ #define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) #define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) #define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_) #define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 19bd29e..e56cbab 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ #define PCRE2_MATCH_FUNCTIONS \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create(uint32_t, pcre2_general_context *); \ +PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ + *pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \ PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \ *pcre2_match_data_create_from_pattern(const pcre2_code *, \ pcre2_general_context *); \ @@ -868,6 +870,7 @@ pcre2_compile are called by application code. */ #define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_) #define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_) #define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_) +#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_) #define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_) #define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_) #define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_) diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index f8a3d25..ed2b9fb 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -615,6 +615,8 @@ here.) */ #undef LOOKBEHIND_MAX #define LOOKBEHIND_MAX UINT16_MAX +struct heapframe; /* see below */ + typedef struct pcre2_real_code { pcre2_memctl memctl; /* Memory control fields */ const uint8_t *tables; /* The character tables */ @@ -661,6 +663,7 @@ typedef struct pcre2_real_match_data { uint8_t flags; /* Various flags */ uint16_t oveccount; /* Number of pairs */ int rc; /* The return code from the match */ + struct heapframe *start_frames; /* Initial heap frames (NULL for stack) */ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } pcre2_real_match_data; diff --git a/src/pcre2_match.c b/src/pcre2_match.c index efab7d0..40e58ae 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6268,35 +6268,10 @@ switch (Freturn_id) #undef LBL } - -/************************************************* -* Match a Regular Expression * -*************************************************/ - -/* This function applies a compiled pattern to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - code points to the compiled expression - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - match_data points to a match_data block - mcontext points a PCRE2 context - -Returns: > 0 => success; value is the number of ovector pairs filled - = 0 => success, but ovector is not big enough - = -1 => failed to match (PCRE2_ERROR_NOMATCH) - = -2 => partial match (PCRE2_ERROR_PARTIAL) - < -2 => some kind of unexpected problem -*/ - -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, +static int +match_start(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, - pcre2_match_context *mcontext) + pcre2_match_context *mcontext, heapframe *start_frames) { int rc; int was_zero_terminated = 0; @@ -6354,24 +6329,7 @@ pcre2_callout_block cb; match_block actual_match_block; match_block *mb = &actual_match_block; -/* Allocate an initial vector of backtracking frames on the stack. If this -proves to be too small, it is replaced by a larger one on the heap. To get a -vector of the size required that is aligned for pointers, allocate it as a -vector of pointers. */ - -PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)] - PCRE2_KEEP_UNINITIALIZED; -mb->stack_frames = (heapframe *)stack_frames_vector; - -/* Recognize NULL, length 0 as an empty string. */ - -if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; - -/* Plausibility checks */ - -if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (code == NULL || subject == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; +mb->stack_frames = start_frames; start_match = subject + start_offset; req_cu_ptr = start_match - 1; @@ -7533,6 +7491,95 @@ else match_data->rc = PCRE2_ERROR_NOMATCH; return match_data->rc; } +#if defined(__GNUC__) /* Works for clang/ICC too */ +#define MATCH_START_ON_STACK_NOT_INLINABLE 1 +#define MATCH_START_ON_STACK_NOINLINE __attribute__ ((noinline)) +#elif defined(_MSC_VER) +#define MATCH_START_ON_STACK_NOT_INLINABLE 1 +#define MATCH_START_ON_STACK_NOINLINE __declspec(noinline) +#else +#define MATCH_START_ON_STACK_NOT_INLINABLE 0 +#define MATCH_START_ON_STACK_NOINLINE +#endif + +static MATCH_START_ON_STACK_NOINLINE int +match_start_on_stack(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, + pcre2_match_data *match_data, pcre2_match_context *mcontext) +{ +/* Allocate an initial vector of backtracking frames on the stack. If this +proves to be too small, it is replaced by a larger one on the heap. To get a +vector of the size required that is aligned for pointers, allocate it as a +vector of pointers. */ + +PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)] + PCRE2_KEEP_UNINITIALIZED; + +return match_start(code, subject, length, start_offset, options, match_data, + mcontext, (heapframe *)stack_frames_vector); +} + +/************************************************* +* Match a Regular Expression * +*************************************************/ + +/* This function applies a compiled pattern to a subject string and picks out +portions of the string if it matches. Two elements in the vector are set for +each substring: the offsets to the start and end of the substring. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block + mcontext points a PCRE2 context + +Returns: > 0 => success; value is the number of ovector pairs filled + = 0 => success, but ovector is not big enough + = -1 => failed to match (PCRE2_ERROR_NOMATCH) + = -2 => partial match (PCRE2_ERROR_PARTIAL) + < -2 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) +{ +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; + +/* Use stack frames only if match_data does not provide ones, playing some + * noinline/indirection games to avoid allocating the frames on stack here + * when pcre2_match_data_create_with_frames() was used. */ +if (match_data->start_frames == NULL) { +#if MATCH_START_ON_STACK_NOT_INLINABLE /* garanteed by the compiler */ + return match_start_on_stack(code, subject, length, start_offset, + options, match_data, mcontext); +#else /* indirection that prevents inlining */ + int (*volatile fn)(const pcre2_code*, PCRE2_SPTR, PCRE2_SIZE, + PCRE2_SIZE, uint32_t, pcre2_match_data*, + pcre2_match_context*) = &match_start_on_stack; + return (*fn)(code, subject, length, start_offset, options, match_data, + mcontext); +#endif +} + +/* Use initial heap frames from match_data. */ +return match_start(code, subject, length, start_offset, + options, match_data, mcontext, + match_data->start_frames); +} + /* These #undefs are here to enable unity builds with CMake. */ #undef NLBLOCK /* Block containing newline information */ diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index 53e4698..e709db6 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -45,6 +45,36 @@ POSSIBILITY OF SUCH DAMAGE. #include "pcre2_internal.h" +static inline PCRE2_SIZE +start_frames_offset(uint32_t oveccount) +{ + return ((2*oveccount*sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) & + ~(HEAPFRAME_ALIGNMENT - 1)); +} + +static inline pcre2_match_data * +match_data_create(uint32_t oveccount, int with_frames, + pcre2_general_context *gcontext) +{ +pcre2_match_data *yield; +PCRE2_SIZE ovecsize; +if (oveccount < 1) oveccount = 1; +if (with_frames) + ovecsize = start_frames_offset(oveccount) + START_FRAMES_SIZE; +else + ovecsize = 2*oveccount*sizeof(PCRE2_SIZE); +yield = PRIV(memctl_malloc)( + offsetof(pcre2_match_data, ovector) + ovecsize, (pcre2_memctl *)gcontext); +if (yield == NULL) return NULL; +yield->oveccount = oveccount; +if (with_frames) + yield->start_frames = (heapframe *) + ((char *)yield->ovector + ovecsize - START_FRAMES_SIZE); +else + yield->start_frames = NULL; /* use stack frames from pcre2_match() */ +yield->flags = 0; +return yield; +} /************************************************* @@ -56,15 +86,22 @@ POSSIBILITY OF SUCH DAMAGE. PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) { -pcre2_match_data *yield; -if (oveccount < 1) oveccount = 1; -yield = PRIV(memctl_malloc)( - offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE), - (pcre2_memctl *)gcontext); -if (yield == NULL) return NULL; -yield->oveccount = oveccount; -yield->flags = 0; -return yield; + return match_data_create(oveccount, 0, gcontext); +} + + + +/************************************************* +* Create a match data block given ovector size * +*************************************************/ + +/* A minimum of 1 is imposed on the number of ovector pairs. */ + +PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION +pcre2_match_data_create_with_frames(uint32_t oveccount, + pcre2_general_context *gcontext) +{ + return match_data_create(oveccount, 1, gcontext); } @@ -79,9 +116,9 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION pcre2_match_data_create_from_pattern(const pcre2_code *code, pcre2_general_context *gcontext) { +uint32_t oveccount = ((pcre2_real_code *)code)->top_bracket + 1; if (gcontext == NULL) gcontext = (pcre2_general_context *)code; -return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, - gcontext); +return match_data_create(oveccount, 0, gcontext); } @@ -160,7 +197,9 @@ PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION pcre2_get_match_data_size(pcre2_match_data *match_data) { return offsetof(pcre2_match_data, ovector) + - 2 * (match_data->oveccount) * sizeof(PCRE2_SIZE); + (match_data->start_frames + ? start_frames_offset(match_data->oveccount) + START_FRAMES_SIZE + : 2 * match_data->oveccount * sizeof(PCRE2_SIZE)); } /* End of pcre2_match_data.c */ diff --git a/src/pcre2test.c b/src/pcre2test.c index 00516fb..7c3e8b0 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -1248,6 +1248,14 @@ are supported. */ else \ G(a,32) = pcre2_match_data_create_32(b,c) +#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \ + if (test_mode == PCRE8_MODE) \ + G(a,8) = pcre2_match_data_create_with_frames_8(b,c); \ + else if (test_mode == PCRE16_MODE) \ + G(a,16) = pcre2_match_data_create_with_frames_16(b,c); \ + else \ + G(a,32) = pcre2_match_data_create_with_frames_32(b,c) + #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ if (test_mode == PCRE8_MODE) \ G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \ @@ -1766,6 +1774,12 @@ the three different cases. */ else \ G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c) +#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + G(a,BITONE) = G(pcre2_match_data_create_with_frames_,BITONE)(b,c); \ + else \ + G(a,BITTWO) = G(pcre2_match_data_create_with_frames_,BITTWO)(b,c) + #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \ @@ -2071,6 +2085,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c) +#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \ + G(a,8) = pcre2_match_data_create_with_frames_8(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) @@ -2178,6 +2194,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c) +#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \ + G(a,16) = pcre2_match_data_create_with_frames_16(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) @@ -2285,6 +2303,8 @@ the three different cases. */ #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c) +#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \ + G(a,32) = pcre2_match_data_create_with_frames_32(b,c) #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) @@ -7287,7 +7307,7 @@ else { max_oveccount = dat_datctl.oveccount; PCRE2_MATCH_DATA_FREE(match_data); - PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL); + PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(match_data, max_oveccount, NULL); } if (CASTVAR(void *, match_data) == NULL) @@ -9170,7 +9190,7 @@ max_oveccount = DEFAULT_OVECCOUNT; G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \ G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \ G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \ - G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS)) + G(match_data,BITS) = G(pcre2_match_data_create_with_frames_,BITS)(max_oveccount, G(general_context,BITS)) #define CONTEXTTESTS \ (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \