Provide pcre2_match_data_create_with_frames() to avoid start stack frames.

pcre2_match() uses START_FRAMES_SIZE (20KiB) of stack space for the start
(initial) frames, which may be too large for systems with small per-thread
stacks (e.g. 128KiB by default on Alpine Linux).

Provide pcre2_match_data_create_with_frames() to allow for the user to
create match_data containing context/heap allocated start frames, which
will be used by pcre2_match() if available instead of the stack frames.

* src/pcre2.h.generic, src/pcre2.h.in:
  Declare pcre2_match_data_create_with_frames().

* src/pcre2_intmodedep.h:
  Add a pointer to start_frames to struct pcre2_real_match_data, will be
  NULL if stack frames are to be used.

* src/pcre2_match_data.c:
  Provide a common match_data_create() helper to be called by all the
  pcre2_match_data_create*() functions without duplicating code.
  When called from pcre2_match_data_create_with_frames(), match_data_create()
  will allocate the start frames in (at the end of) the ovector and point
  match_data->start_frames to that.

* src/pcre2_match.c:
  Split pcre2_match() in two match_start() and match_start_on_stack()
  helpers, the former doing the usual work given some start frames, the
  latter calling the former with start frames reserved on the stack.
  pcre2_match can then call the one or the other depending on whether
  match_data->start_frames exist or not, playing some noinline/indirection
  games to do the stack allocation from match_start_on_stack() only.
This commit is contained in:
ylavic 2022-07-10 15:51:11 +02:00
parent 3e52db5209
commit a7057201ca
6 changed files with 175 additions and 60 deletions

View File

@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
@ -868,6 +870,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)

View File

@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
@ -868,6 +870,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)

View File

@ -615,6 +615,8 @@ here.) */
#undef LOOKBEHIND_MAX
#define LOOKBEHIND_MAX UINT16_MAX
struct heapframe; /* see below */
typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */
@ -661,6 +663,7 @@ typedef struct pcre2_real_match_data {
uint8_t flags; /* Various flags */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
struct heapframe *start_frames; /* Initial heap frames (NULL for stack) */
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} pcre2_real_match_data;

View File

@ -6268,35 +6268,10 @@ switch (Freturn_id)
#undef LBL
}
/*************************************************
* Match a Regular Expression *
*************************************************/
/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points a PCRE2 context
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
= -2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
static int
match_start(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
pcre2_match_context *mcontext, heapframe *start_frames)
{
int rc;
int was_zero_terminated = 0;
@ -6354,24 +6329,7 @@ pcre2_callout_block cb;
match_block actual_match_block;
match_block *mb = &actual_match_block;
/* Allocate an initial vector of backtracking frames on the stack. If this
proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
PCRE2_KEEP_UNINITIALIZED;
mb->stack_frames = (heapframe *)stack_frames_vector;
/* Recognize NULL, length 0 as an empty string. */
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
/* Plausibility checks */
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
mb->stack_frames = start_frames;
start_match = subject + start_offset;
req_cu_ptr = start_match - 1;
@ -7533,6 +7491,95 @@ else match_data->rc = PCRE2_ERROR_NOMATCH;
return match_data->rc;
}
#if defined(__GNUC__) /* Works for clang/ICC too */
#define MATCH_START_ON_STACK_NOT_INLINABLE 1
#define MATCH_START_ON_STACK_NOINLINE __attribute__ ((noinline))
#elif defined(_MSC_VER)
#define MATCH_START_ON_STACK_NOT_INLINABLE 1
#define MATCH_START_ON_STACK_NOINLINE __declspec(noinline)
#else
#define MATCH_START_ON_STACK_NOT_INLINABLE 0
#define MATCH_START_ON_STACK_NOINLINE
#endif
static MATCH_START_ON_STACK_NOINLINE int
match_start_on_stack(const pcre2_code *code, PCRE2_SPTR subject,
PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options,
pcre2_match_data *match_data, pcre2_match_context *mcontext)
{
/* Allocate an initial vector of backtracking frames on the stack. If this
proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
PCRE2_KEEP_UNINITIALIZED;
return match_start(code, subject, length, start_offset, options, match_data,
mcontext, (heapframe *)stack_frames_vector);
}
/*************************************************
* Match a Regular Expression *
*************************************************/
/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points a PCRE2 context
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
= -2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
{
/* Recognize NULL, length 0 as an empty string. */
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
/* Plausibility checks */
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
/* Use stack frames only if match_data does not provide ones, playing some
* noinline/indirection games to avoid allocating the frames on stack here
* when pcre2_match_data_create_with_frames() was used. */
if (match_data->start_frames == NULL) {
#if MATCH_START_ON_STACK_NOT_INLINABLE /* garanteed by the compiler */
return match_start_on_stack(code, subject, length, start_offset,
options, match_data, mcontext);
#else /* indirection that prevents inlining */
int (*volatile fn)(const pcre2_code*, PCRE2_SPTR, PCRE2_SIZE,
PCRE2_SIZE, uint32_t, pcre2_match_data*,
pcre2_match_context*) = &match_start_on_stack;
return (*fn)(code, subject, length, start_offset, options, match_data,
mcontext);
#endif
}
/* Use initial heap frames from match_data. */
return match_start(code, subject, length, start_offset,
options, match_data, mcontext,
match_data->start_frames);
}
/* These #undefs are here to enable unity builds with CMake. */
#undef NLBLOCK /* Block containing newline information */

View File

@ -45,6 +45,36 @@ POSSIBILITY OF SUCH DAMAGE.
#include "pcre2_internal.h"
static inline PCRE2_SIZE
start_frames_offset(uint32_t oveccount)
{
return ((2*oveccount*sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
~(HEAPFRAME_ALIGNMENT - 1));
}
static inline pcre2_match_data *
match_data_create(uint32_t oveccount, int with_frames,
pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
PCRE2_SIZE ovecsize;
if (oveccount < 1) oveccount = 1;
if (with_frames)
ovecsize = start_frames_offset(oveccount) + START_FRAMES_SIZE;
else
ovecsize = 2*oveccount*sizeof(PCRE2_SIZE);
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + ovecsize, (pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
if (with_frames)
yield->start_frames = (heapframe *)
((char *)yield->ovector + ovecsize - START_FRAMES_SIZE);
else
yield->start_frames = NULL; /* use stack frames from pcre2_match() */
yield->flags = 0;
return yield;
}
/*************************************************
@ -56,15 +86,22 @@ POSSIBILITY OF SUCH DAMAGE.
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
yield->flags = 0;
return yield;
return match_data_create(oveccount, 0, gcontext);
}
/*************************************************
* Create a match data block given ovector size *
*************************************************/
/* A minimum of 1 is imposed on the number of ovector pairs. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_with_frames(uint32_t oveccount,
pcre2_general_context *gcontext)
{
return match_data_create(oveccount, 1, gcontext);
}
@ -79,9 +116,9 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
pcre2_general_context *gcontext)
{
uint32_t oveccount = ((pcre2_real_code *)code)->top_bracket + 1;
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext);
return match_data_create(oveccount, 0, gcontext);
}
@ -160,7 +197,9 @@ PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_match_data_size(pcre2_match_data *match_data)
{
return offsetof(pcre2_match_data, ovector) +
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
(match_data->start_frames
? start_frames_offset(match_data->oveccount) + START_FRAMES_SIZE
: 2 * match_data->oveccount * sizeof(PCRE2_SIZE));
}
/* End of pcre2_match_data.c */

View File

@ -1248,6 +1248,14 @@ are supported. */
else \
G(a,32) = pcre2_match_data_create_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
if (test_mode == PCRE8_MODE) \
G(a,8) = pcre2_match_data_create_with_frames_8(b,c); \
else if (test_mode == PCRE16_MODE) \
G(a,16) = pcre2_match_data_create_with_frames_16(b,c); \
else \
G(a,32) = pcre2_match_data_create_with_frames_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
if (test_mode == PCRE8_MODE) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
@ -1766,6 +1774,12 @@ the three different cases. */
else \
G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(a,BITONE) = G(pcre2_match_data_create_with_frames_,BITONE)(b,c); \
else \
G(a,BITTWO) = G(pcre2_match_data_create_with_frames_,BITTWO)(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
@ -2071,6 +2085,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,8) = pcre2_match_data_create_with_frames_8(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
@ -2178,6 +2194,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,16) = pcre2_match_data_create_with_frames_16(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
@ -2285,6 +2303,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,32) = pcre2_match_data_create_with_frames_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
@ -7287,7 +7307,7 @@ else
{
max_oveccount = dat_datctl.oveccount;
PCRE2_MATCH_DATA_FREE(match_data);
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(match_data, max_oveccount, NULL);
}
if (CASTVAR(void *, match_data) == NULL)
@ -9170,7 +9190,7 @@ max_oveccount = DEFAULT_OVECCOUNT;
G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
G(match_data,BITS) = G(pcre2_match_data_create_with_frames_,BITS)(max_oveccount, G(general_context,BITS))
#define CONTEXTTESTS \
(void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \