Keep recursion ovecsave blocks on a chain and re-use them.

This commit is contained in:
Philip.Hazel 2014-08-16 15:10:42 +00:00
parent 7efba85b56
commit ac8cbf8d6a
2 changed files with 86 additions and 50 deletions

View File

@ -682,14 +682,22 @@ typedef struct compile_block {
BOOL dupnames; /* Duplicate names exist */ BOOL dupnames; /* Duplicate names exist */
} compile_block; } compile_block;
/* Structure for keeping a chain of heap blocks used for saving ovectors
during pattern recursion when the ovector is larger than can be saved on
the system stack. */
typedef struct ovecsave_frame {
struct ovecsave_frame *next; /* Next frame on free chain */
PCRE2_SIZE saved_ovec[1]; /* First vector element */
} ovecsave_frame;
/* Structure for items in a linked list that represents an explicit recursive /* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */ call within the pattern; used by pcre_match(). */
typedef struct recursion_info { typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */ unsigned int group_num; /* Number of group that was called */
PCRE2_SIZE *ovec_save; /* Pointer to start of saved ovector */ PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */ uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */ PCRE2_SPTR subject_position; /* Position at start of recursion */
} recursion_info; } recursion_info;
@ -758,6 +766,7 @@ typedef struct match_block {
PCRE2_UCHAR nl[4]; /* Newline string when fixed */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */ recursion_info *recursive; /* Linked list of recursion data */
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
void *callout_data; /* To pass back to callouts */ void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */ int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE

View File

@ -343,7 +343,7 @@ typedef struct heapframe {
PCRE2_SPTR Xmstart; PCRE2_SPTR Xmstart;
PCRE2_SPTR Xcallpat; PCRE2_SPTR Xcallpat;
PCRE2_SPTR Xdata; PCRE2_SPTR Xdata;
PCRE2_SPTR Xnext; PCRE2_SPTR Xnext_ecode;
PCRE2_SPTR Xpp; PCRE2_SPTR Xpp;
PCRE2_SPTR Xprev; PCRE2_SPTR Xprev;
PCRE2_SPTR Xsaved_eptr; PCRE2_SPTR Xsaved_eptr;
@ -447,7 +447,7 @@ PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
/* Save the ovector */ /* Save the ovector */
new_recursive->ovec_save = ovecsave; new_recursive->ovec_save = ovecsave;
memcpy(ovecsave, mb->ovector, new_recursive->saved_max * sizeof(PCRE2_SIZE)); memcpy(ovecsave, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the ovector /* Do the recursion. After processing each alternative, restore the ovector
data and the last captured value. */ data and the last captured value. */
@ -458,7 +458,7 @@ do
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top, rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
mb, eptrb, rdepth + 1); mb, eptrb, rdepth + 1);
memcpy(mb->ovector, new_recursive->ovec_save, memcpy(mb->ovector, new_recursive->ovec_save,
new_recursive->saved_max * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive->saved_capture_last; mb->capture_last = new_recursive->saved_capture_last;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc; if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
@ -606,7 +606,7 @@ HEAP_RECURSE:
#define callpat frame->Xcallpat #define callpat frame->Xcallpat
#define codelink frame->Xcodelink #define codelink frame->Xcodelink
#define data frame->Xdata #define data frame->Xdata
#define next frame->Xnext #define next_ecode frame->Xnext_ecode
#define pp frame->Xpp #define pp frame->Xpp
#define prev frame->Xprev #define prev frame->Xprev
#define saved_eptr frame->Xsaved_eptr #define saved_eptr frame->Xsaved_eptr
@ -654,7 +654,7 @@ PCRE2_SPTR charptr;
#endif #endif
PCRE2_SPTR callpat; PCRE2_SPTR callpat;
PCRE2_SPTR data; PCRE2_SPTR data;
PCRE2_SPTR next; PCRE2_SPTR next_ecode;
PCRE2_SPTR pp; PCRE2_SPTR pp;
PCRE2_SPTR prev; PCRE2_SPTR prev;
PCRE2_SPTR saved_eptr; PCRE2_SPTR saved_eptr;
@ -897,9 +897,9 @@ for (;;)
} }
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1001,9 +1001,9 @@ for (;;)
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1082,9 +1082,9 @@ for (;;)
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1172,9 +1172,9 @@ for (;;)
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1245,9 +1245,9 @@ for (;;)
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1525,9 +1525,9 @@ for (;;)
if (rrc == MATCH_THEN) if (rrc == MATCH_THEN)
{ {
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
} }
@ -1592,9 +1592,9 @@ for (;;)
THEN. */ THEN. */
case MATCH_THEN: case MATCH_THEN:
next = ecode + GET(ecode,1); next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next && if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next == OP_ALT)) (*ecode == OP_ALT || *next_ecode == OP_ALT))
{ {
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
break; break;
@ -1711,6 +1711,7 @@ for (;;)
case OP_RECURSE: case OP_RECURSE:
{ {
ovecsave_frame *fr;
recursion_info *ri; recursion_info *ri;
uint32_t recno; uint32_t recno;
@ -1729,7 +1730,6 @@ for (;;)
new_recursive.group_num = recno; new_recursive.group_num = recno;
new_recursive.saved_capture_last = mb->capture_last; new_recursive.saved_capture_last = mb->capture_last;
new_recursive.saved_max = mb->offset_end;
new_recursive.subject_position = eptr; new_recursive.subject_position = eptr;
new_recursive.prevrec = mb->recursive; new_recursive.prevrec = mb->recursive;
mb->recursive = &new_recursive; mb->recursive = &new_recursive;
@ -1744,7 +1744,7 @@ for (;;)
enough. */ enough. */
#ifndef HEAP_MATCH_RECURSE #ifndef HEAP_MATCH_RECURSE
if (new_recursive.saved_max <= OP_RECURSE_STACK_SAVE_MAX) if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
{ {
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb, rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
eptrb, rdepth); eptrb, rdepth);
@ -1761,14 +1761,25 @@ for (;;)
} }
#endif #endif
/* If the ovector is too big, or if we are using the heap for match() /* If the ovector is too big, or if we are using the heap for match()
recursion, we have to use the heap for saving the ovector. */ recursion, we have to use the heap for saving the ovector. Used ovecsave
frames are kept on a chain and re-used. This makes a small improvement in
execution time on Linux. */
new_recursive.ovec_save = (PCRE2_SIZE *) if (mb->ovecsave_chain != NULL)
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_SIZE), {
mb->memctl.memory_data)); new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
if (new_recursive.ovec_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY); mb->ovecsave_chain = mb->ovecsave_chain->next;
}
else
{
fr = (ovecsave_frame *)(mb->memctl.malloc(sizeof(ovecsave_frame *) +
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
new_recursive.ovec_save = fr->saved_ovec;
}
memcpy(new_recursive.ovec_save, mb->ovector, memcpy(new_recursive.ovec_save, mb->ovector,
new_recursive.saved_max * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the /* Do the recursion. After processing each alternative, restore the
ovector data and the last captured value. This code has the same overall ovector data and the last captured value. This code has the same overall
@ -1783,13 +1794,16 @@ for (;;)
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
mb, eptrb, RM6); mb, eptrb, RM6);
memcpy(mb->ovector, new_recursive.ovec_save, memcpy(mb->ovector, new_recursive.ovec_save,
new_recursive.saved_max * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive.saved_capture_last; mb->capture_last = new_recursive.saved_capture_last;
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{ {
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data); fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr;
/* Set where we got to in the subject, and reset the start, in case /* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion, it was changed by \K. This *is* propagated back out of a recursion,
@ -1820,7 +1834,10 @@ for (;;)
RECURSION_RETURN: RECURSION_RETURN:
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data); fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr;
RRETURN(rrc); RRETURN(rrc);
} }
@ -1841,25 +1858,25 @@ for (;;)
optional ones preceded by BRAZERO or BRAMINZERO. */ optional ones preceded by BRAZERO or BRAMINZERO. */
case OP_BRAZERO: case OP_BRAZERO:
next = ecode + 1; next_ecode = ecode + 1;
RMATCH(eptr, next, offset_top, mb, eptrb, RM10); RMATCH(eptr, next_ecode, offset_top, mb, eptrb, RM10);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
do next += GET(next, 1); while (*next == OP_ALT); do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
ecode = next + 1 + LINK_SIZE; ecode = next_ecode + 1 + LINK_SIZE;
break; break;
case OP_BRAMINZERO: case OP_BRAMINZERO:
next = ecode + 1; next_ecode = ecode + 1;
do next += GET(next, 1); while (*next == OP_ALT); do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
RMATCH(eptr, next + 1+LINK_SIZE, offset_top, mb, eptrb, RM11); RMATCH(eptr, next_ecode + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode++; ecode++;
break; break;
case OP_SKIPZERO: case OP_SKIPZERO:
next = ecode+1; next_ecode = ecode+1;
do next += GET(next,1); while (*next == OP_ALT); do next_ecode += GET(next_ecode,1); while (*next_ecode == OP_ALT);
ecode = next + 1 + LINK_SIZE; ecode = next_ecode + 1 + LINK_SIZE;
break; break;
/* BRAPOSZERO occurs before a possessive bracket group. Don't do anything /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
@ -6173,7 +6190,7 @@ Undefine all the macros that were defined above to handle this. */
#undef callpat #undef callpat
#undef charptr #undef charptr
#undef data #undef data
#undef next #undef next_ecode
#undef pp #undef pp
#undef prev #undef prev
#undef saved_eptr #undef saved_eptr
@ -6425,6 +6442,7 @@ mb->poptions = re->overall_options; /* Pattern options */
mb->ignore_skip_arg = 0; mb->ignore_skip_arg = 0;
mb->mark = mb->nomatch_mark = NULL; /* In case never set */ mb->mark = mb->nomatch_mark = NULL; /* In case never set */
mb->recursive = NULL; /* No recursion at top level */ mb->recursive = NULL; /* No recursion at top level */
mb->ovecsave_chain = NULL; /* No ovecsave blocks yet */
mb->hitend = FALSE; mb->hitend = FALSE;
/* The name table is needed for finding all the numbers associated with a /* The name table is needed for finding all the numbers associated with a
@ -6890,6 +6908,15 @@ ENDLOOP:
release_match_heapframes(&frame_zero, mb); release_match_heapframes(&frame_zero, mb);
#endif #endif
/* Release any frames that were saved from recursions. */
while (mb->ovecsave_chain != NULL)
{
ovecsave_frame *this = mb->ovecsave_chain;
mb->ovecsave_chain = this->next;
mb->memctl.free(this, mb->memctl.memory_data);
}
/* Fill in fields that are always returned in the match data. */ /* Fill in fields that are always returned in the match data. */
match_data->code = re; match_data->code = re;