Keep recursion ovecsave blocks on a chain and re-use them.
This commit is contained in:
parent
7efba85b56
commit
ac8cbf8d6a
|
@ -682,14 +682,22 @@ typedef struct compile_block {
|
||||||
BOOL dupnames; /* Duplicate names exist */
|
BOOL dupnames; /* Duplicate names exist */
|
||||||
} compile_block;
|
} compile_block;
|
||||||
|
|
||||||
|
/* Structure for keeping a chain of heap blocks used for saving ovectors
|
||||||
|
during pattern recursion when the ovector is larger than can be saved on
|
||||||
|
the system stack. */
|
||||||
|
|
||||||
|
typedef struct ovecsave_frame {
|
||||||
|
struct ovecsave_frame *next; /* Next frame on free chain */
|
||||||
|
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
||||||
|
} ovecsave_frame;
|
||||||
|
|
||||||
/* Structure for items in a linked list that represents an explicit recursive
|
/* Structure for items in a linked list that represents an explicit recursive
|
||||||
call within the pattern; used by pcre_match(). */
|
call within the pattern; used by pcre_match(). */
|
||||||
|
|
||||||
typedef struct recursion_info {
|
typedef struct recursion_info {
|
||||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||||
unsigned int group_num; /* Number of group that was called */
|
unsigned int group_num; /* Number of group that was called */
|
||||||
PCRE2_SIZE *ovec_save; /* Pointer to start of saved ovector */
|
PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
|
||||||
uint32_t saved_max; /* Number of saved offsets */
|
|
||||||
uint32_t saved_capture_last; /* Last capture number */
|
uint32_t saved_capture_last; /* Last capture number */
|
||||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||||
} recursion_info;
|
} recursion_info;
|
||||||
|
@ -758,6 +766,7 @@ typedef struct match_block {
|
||||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||||
recursion_info *recursive; /* Linked list of recursion data */
|
recursion_info *recursive; /* Linked list of recursion data */
|
||||||
|
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||||
void *callout_data; /* To pass back to callouts */
|
void *callout_data; /* To pass back to callouts */
|
||||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
|
|
|
@ -343,7 +343,7 @@ typedef struct heapframe {
|
||||||
PCRE2_SPTR Xmstart;
|
PCRE2_SPTR Xmstart;
|
||||||
PCRE2_SPTR Xcallpat;
|
PCRE2_SPTR Xcallpat;
|
||||||
PCRE2_SPTR Xdata;
|
PCRE2_SPTR Xdata;
|
||||||
PCRE2_SPTR Xnext;
|
PCRE2_SPTR Xnext_ecode;
|
||||||
PCRE2_SPTR Xpp;
|
PCRE2_SPTR Xpp;
|
||||||
PCRE2_SPTR Xprev;
|
PCRE2_SPTR Xprev;
|
||||||
PCRE2_SPTR Xsaved_eptr;
|
PCRE2_SPTR Xsaved_eptr;
|
||||||
|
@ -447,7 +447,7 @@ PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
|
||||||
/* Save the ovector */
|
/* Save the ovector */
|
||||||
|
|
||||||
new_recursive->ovec_save = ovecsave;
|
new_recursive->ovec_save = ovecsave;
|
||||||
memcpy(ovecsave, mb->ovector, new_recursive->saved_max * sizeof(PCRE2_SIZE));
|
memcpy(ovecsave, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
/* Do the recursion. After processing each alternative, restore the ovector
|
/* Do the recursion. After processing each alternative, restore the ovector
|
||||||
data and the last captured value. */
|
data and the last captured value. */
|
||||||
|
@ -458,7 +458,7 @@ do
|
||||||
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
||||||
mb, eptrb, rdepth + 1);
|
mb, eptrb, rdepth + 1);
|
||||||
memcpy(mb->ovector, new_recursive->ovec_save,
|
memcpy(mb->ovector, new_recursive->ovec_save,
|
||||||
new_recursive->saved_max * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
mb->capture_last = new_recursive->saved_capture_last;
|
mb->capture_last = new_recursive->saved_capture_last;
|
||||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
|
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
|
||||||
|
|
||||||
|
@ -606,7 +606,7 @@ HEAP_RECURSE:
|
||||||
#define callpat frame->Xcallpat
|
#define callpat frame->Xcallpat
|
||||||
#define codelink frame->Xcodelink
|
#define codelink frame->Xcodelink
|
||||||
#define data frame->Xdata
|
#define data frame->Xdata
|
||||||
#define next frame->Xnext
|
#define next_ecode frame->Xnext_ecode
|
||||||
#define pp frame->Xpp
|
#define pp frame->Xpp
|
||||||
#define prev frame->Xprev
|
#define prev frame->Xprev
|
||||||
#define saved_eptr frame->Xsaved_eptr
|
#define saved_eptr frame->Xsaved_eptr
|
||||||
|
@ -654,7 +654,7 @@ PCRE2_SPTR charptr;
|
||||||
#endif
|
#endif
|
||||||
PCRE2_SPTR callpat;
|
PCRE2_SPTR callpat;
|
||||||
PCRE2_SPTR data;
|
PCRE2_SPTR data;
|
||||||
PCRE2_SPTR next;
|
PCRE2_SPTR next_ecode;
|
||||||
PCRE2_SPTR pp;
|
PCRE2_SPTR pp;
|
||||||
PCRE2_SPTR prev;
|
PCRE2_SPTR prev;
|
||||||
PCRE2_SPTR saved_eptr;
|
PCRE2_SPTR saved_eptr;
|
||||||
|
@ -897,9 +897,9 @@ for (;;)
|
||||||
}
|
}
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1001,9 +1001,9 @@ for (;;)
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1082,9 +1082,9 @@ for (;;)
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1172,9 +1172,9 @@ for (;;)
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1245,9 +1245,9 @@ for (;;)
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1525,9 +1525,9 @@ for (;;)
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1592,9 +1592,9 @@ for (;;)
|
||||||
THEN. */
|
THEN. */
|
||||||
|
|
||||||
case MATCH_THEN:
|
case MATCH_THEN:
|
||||||
next = ecode + GET(ecode,1);
|
next_ecode = ecode + GET(ecode,1);
|
||||||
if (mb->start_match_ptr < next &&
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
(*ecode == OP_ALT || *next == OP_ALT))
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
{
|
{
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
break;
|
break;
|
||||||
|
@ -1711,6 +1711,7 @@ for (;;)
|
||||||
|
|
||||||
case OP_RECURSE:
|
case OP_RECURSE:
|
||||||
{
|
{
|
||||||
|
ovecsave_frame *fr;
|
||||||
recursion_info *ri;
|
recursion_info *ri;
|
||||||
uint32_t recno;
|
uint32_t recno;
|
||||||
|
|
||||||
|
@ -1729,7 +1730,6 @@ for (;;)
|
||||||
|
|
||||||
new_recursive.group_num = recno;
|
new_recursive.group_num = recno;
|
||||||
new_recursive.saved_capture_last = mb->capture_last;
|
new_recursive.saved_capture_last = mb->capture_last;
|
||||||
new_recursive.saved_max = mb->offset_end;
|
|
||||||
new_recursive.subject_position = eptr;
|
new_recursive.subject_position = eptr;
|
||||||
new_recursive.prevrec = mb->recursive;
|
new_recursive.prevrec = mb->recursive;
|
||||||
mb->recursive = &new_recursive;
|
mb->recursive = &new_recursive;
|
||||||
|
@ -1744,7 +1744,7 @@ for (;;)
|
||||||
enough. */
|
enough. */
|
||||||
|
|
||||||
#ifndef HEAP_MATCH_RECURSE
|
#ifndef HEAP_MATCH_RECURSE
|
||||||
if (new_recursive.saved_max <= OP_RECURSE_STACK_SAVE_MAX)
|
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
|
||||||
{
|
{
|
||||||
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
||||||
eptrb, rdepth);
|
eptrb, rdepth);
|
||||||
|
@ -1761,14 +1761,25 @@ for (;;)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* If the ovector is too big, or if we are using the heap for match()
|
/* If the ovector is too big, or if we are using the heap for match()
|
||||||
recursion, we have to use the heap for saving the ovector. */
|
recursion, we have to use the heap for saving the ovector. Used ovecsave
|
||||||
|
frames are kept on a chain and re-used. This makes a small improvement in
|
||||||
|
execution time on Linux. */
|
||||||
|
|
||||||
|
if (mb->ovecsave_chain != NULL)
|
||||||
|
{
|
||||||
|
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
|
||||||
|
mb->ovecsave_chain = mb->ovecsave_chain->next;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fr = (ovecsave_frame *)(mb->memctl.malloc(sizeof(ovecsave_frame *) +
|
||||||
|
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
|
||||||
|
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
||||||
|
new_recursive.ovec_save = fr->saved_ovec;
|
||||||
|
}
|
||||||
|
|
||||||
new_recursive.ovec_save = (PCRE2_SIZE *)
|
|
||||||
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_SIZE),
|
|
||||||
mb->memctl.memory_data));
|
|
||||||
if (new_recursive.ovec_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
|
||||||
memcpy(new_recursive.ovec_save, mb->ovector,
|
memcpy(new_recursive.ovec_save, mb->ovector,
|
||||||
new_recursive.saved_max * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
/* Do the recursion. After processing each alternative, restore the
|
/* Do the recursion. After processing each alternative, restore the
|
||||||
ovector data and the last captured value. This code has the same overall
|
ovector data and the last captured value. This code has the same overall
|
||||||
|
@ -1783,13 +1794,16 @@ for (;;)
|
||||||
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
|
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
|
||||||
mb, eptrb, RM6);
|
mb, eptrb, RM6);
|
||||||
memcpy(mb->ovector, new_recursive.ovec_save,
|
memcpy(mb->ovector, new_recursive.ovec_save,
|
||||||
new_recursive.saved_max * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
mb->capture_last = new_recursive.saved_capture_last;
|
mb->capture_last = new_recursive.saved_capture_last;
|
||||||
mb->recursive = new_recursive.prevrec;
|
mb->recursive = new_recursive.prevrec;
|
||||||
|
|
||||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
||||||
{
|
{
|
||||||
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
|
fr = (ovecsave_frame *)
|
||||||
|
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||||
|
fr->next = mb->ovecsave_chain;
|
||||||
|
mb->ovecsave_chain = fr;
|
||||||
|
|
||||||
/* Set where we got to in the subject, and reset the start, in case
|
/* Set where we got to in the subject, and reset the start, in case
|
||||||
it was changed by \K. This *is* propagated back out of a recursion,
|
it was changed by \K. This *is* propagated back out of a recursion,
|
||||||
|
@ -1820,7 +1834,10 @@ for (;;)
|
||||||
|
|
||||||
RECURSION_RETURN:
|
RECURSION_RETURN:
|
||||||
mb->recursive = new_recursive.prevrec;
|
mb->recursive = new_recursive.prevrec;
|
||||||
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
|
fr = (ovecsave_frame *)
|
||||||
|
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||||
|
fr->next = mb->ovecsave_chain;
|
||||||
|
mb->ovecsave_chain = fr;
|
||||||
RRETURN(rrc);
|
RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1841,25 +1858,25 @@ for (;;)
|
||||||
optional ones preceded by BRAZERO or BRAMINZERO. */
|
optional ones preceded by BRAZERO or BRAMINZERO. */
|
||||||
|
|
||||||
case OP_BRAZERO:
|
case OP_BRAZERO:
|
||||||
next = ecode + 1;
|
next_ecode = ecode + 1;
|
||||||
RMATCH(eptr, next, offset_top, mb, eptrb, RM10);
|
RMATCH(eptr, next_ecode, offset_top, mb, eptrb, RM10);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
do next += GET(next, 1); while (*next == OP_ALT);
|
do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
|
||||||
ecode = next + 1 + LINK_SIZE;
|
ecode = next_ecode + 1 + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_BRAMINZERO:
|
case OP_BRAMINZERO:
|
||||||
next = ecode + 1;
|
next_ecode = ecode + 1;
|
||||||
do next += GET(next, 1); while (*next == OP_ALT);
|
do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
|
||||||
RMATCH(eptr, next + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
|
RMATCH(eptr, next_ecode + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
ecode++;
|
ecode++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_SKIPZERO:
|
case OP_SKIPZERO:
|
||||||
next = ecode+1;
|
next_ecode = ecode+1;
|
||||||
do next += GET(next,1); while (*next == OP_ALT);
|
do next_ecode += GET(next_ecode,1); while (*next_ecode == OP_ALT);
|
||||||
ecode = next + 1 + LINK_SIZE;
|
ecode = next_ecode + 1 + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
|
/* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
|
||||||
|
@ -6173,7 +6190,7 @@ Undefine all the macros that were defined above to handle this. */
|
||||||
#undef callpat
|
#undef callpat
|
||||||
#undef charptr
|
#undef charptr
|
||||||
#undef data
|
#undef data
|
||||||
#undef next
|
#undef next_ecode
|
||||||
#undef pp
|
#undef pp
|
||||||
#undef prev
|
#undef prev
|
||||||
#undef saved_eptr
|
#undef saved_eptr
|
||||||
|
@ -6425,6 +6442,7 @@ mb->poptions = re->overall_options; /* Pattern options */
|
||||||
mb->ignore_skip_arg = 0;
|
mb->ignore_skip_arg = 0;
|
||||||
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
|
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
|
||||||
mb->recursive = NULL; /* No recursion at top level */
|
mb->recursive = NULL; /* No recursion at top level */
|
||||||
|
mb->ovecsave_chain = NULL; /* No ovecsave blocks yet */
|
||||||
mb->hitend = FALSE;
|
mb->hitend = FALSE;
|
||||||
|
|
||||||
/* The name table is needed for finding all the numbers associated with a
|
/* The name table is needed for finding all the numbers associated with a
|
||||||
|
@ -6890,6 +6908,15 @@ ENDLOOP:
|
||||||
release_match_heapframes(&frame_zero, mb);
|
release_match_heapframes(&frame_zero, mb);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Release any frames that were saved from recursions. */
|
||||||
|
|
||||||
|
while (mb->ovecsave_chain != NULL)
|
||||||
|
{
|
||||||
|
ovecsave_frame *this = mb->ovecsave_chain;
|
||||||
|
mb->ovecsave_chain = this->next;
|
||||||
|
mb->memctl.free(this, mb->memctl.memory_data);
|
||||||
|
}
|
||||||
|
|
||||||
/* Fill in fields that are always returned in the match data. */
|
/* Fill in fields that are always returned in the match data. */
|
||||||
|
|
||||||
match_data->code = re;
|
match_data->code = re;
|
||||||
|
|
Loading…
Reference in New Issue