Tidy up callout code in pcre2_dfa_match().

This commit is contained in:
Philip.Hazel 2017-12-26 16:43:45 +00:00
parent 911d43cd1e
commit f36a3235bc
2 changed files with 89 additions and 86 deletions

View File

@ -293,6 +293,66 @@ typedef struct stateblock {
/*************************************************
* Process a callout *
*************************************************/
/* This function is called to perform a callout.
Arguments:
code current code pointer
offsets points to current capture offsets
current_subject start of current subject match
ptr current position in subject
mb the match block
extracode extra code offset when called from condition
lengthptr where to return the callout length
Returns: the return from the callout
*/
static int
do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
PCRE2_SIZE *lengthptr)
{
pcre2_callout_block *cb = mb->cb;
*lengthptr = (code[extracode] == OP_CALLOUT)?
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
(PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
if (mb->callout == NULL) return 0; /* No callout provided */
/* Fixed fields in the callout block are set once and for all at the start of
matching. */
cb->offset_vector = offsets;
cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
cb->pattern_position = GET(code, 1 + extracode);
cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
if (code[extracode] == OP_CALLOUT)
{
cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
cb->callout_string_offset = 0;
cb->callout_string = NULL;
cb->callout_string_length = 0;
}
else
{
cb->callout_number = 0;
cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
}
return (mb->callout)(cb, mb->callout_data);
}
/************************************************* /*************************************************
* Match a Regular Expression - DFA engine * * Match a Regular Expression - DFA engine *
*************************************************/ *************************************************/
@ -2566,46 +2626,10 @@ for (;;)
if (code[LINK_SIZE + 1] == OP_CALLOUT if (code[LINK_SIZE + 1] == OP_CALLOUT
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR) || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
{ {
PCRE2_SIZE callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)? PCRE2_SIZE callout_length;
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] : rrc = do_callout(code, offsets, current_subject, ptr, mb,
(PCRE2_SIZE)GET(code, 2 + 3*LINK_SIZE); 1 + LINK_SIZE, &callout_length);
if (rrc < 0) return rrc; /* Abandon */
rrc = 0;
if (mb->callout != NULL)
{
pcre2_callout_block cb;
cb.version = 2;
cb.callout_flags = 0;
cb.capture_top = 1;
cb.capture_last = 0;
cb.offset_vector = offsets;
cb.mark = NULL; /* No (*MARK) support */
cb.subject = start_subject;
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, LINK_SIZE + 2);
cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE);
if (code[LINK_SIZE + 1] == OP_CALLOUT)
{
cb.callout_number = code[2 + 3*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
return rrc; /* Abandon */
}
if (rrc > 0) break; /* Fail this thread */ if (rrc > 0) break; /* Fail this thread */
code += callout_length; /* Skip callout data */ code += callout_length; /* Skip callout data */
} }
@ -2937,45 +2961,10 @@ for (;;)
case OP_CALLOUT: case OP_CALLOUT:
case OP_CALLOUT_STR: case OP_CALLOUT_STR:
{ {
unsigned int callout_length = (*code == OP_CALLOUT) PCRE2_SIZE callout_length;
? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE); rrc = do_callout(code, offsets, current_subject, ptr, mb, 0,
rrc = 0; &callout_length);
if (rrc < 0) return rrc; /* Abandon */
if (mb->callout != NULL)
{
pcre2_callout_block cb;
cb.version = 2;
cb.callout_flags = 0;
cb.capture_top = 1;
cb.capture_last = 0;
cb.offset_vector = offsets;
cb.mark = NULL; /* No (*MARK) support */
cb.subject = start_subject;
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, 1);
cb.next_item_length = GET(code, 1 + LINK_SIZE);
if (*code == OP_CALLOUT)
{
cb.callout_number = code[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
return rrc; /* Abandon */
}
if (rrc == 0) if (rrc == 0)
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); } { ADD_ACTIVE(state_offset + (int)callout_length, 0); }
} }
@ -3094,6 +3083,7 @@ const uint8_t *start_bits = NULL;
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro /* We need to have mb pointing to a match block, because the IS_NEWLINE macro
is used below, and it expects NLBLOCK to be defined as a pointer. */ is used below, and it expects NLBLOCK to be defined as a pointer. */
pcre2_callout_block cb;
dfa_match_block actual_match_block; dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block; dfa_match_block *mb = &actual_match_block;
@ -3171,9 +3161,21 @@ startline = (re->flags & PCRE2_STARTLINE) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
bumpalong_limit = end_subject; bumpalong_limit = end_subject;
/* Get data from the match context, if present, and fill in the fields in the /* Initialize and set up the fixed fields in the callout block, with a pointer
match block. It is an error to set an offset limit without setting the flag at in the match block. */
compile time. */
mb->cb = &cb;
cb.version = 2;
cb.subject = subject;
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
cb.callout_flags = 0;
cb.capture_top = 1; /* No capture support */
cb.capture_last = 0;
cb.mark = NULL; /* No (*MARK) support */
/* Get data from the match context, if present, and fill in the remaining
fields in the match block. It is an error to set an offset limit without
setting the flag at compile time. */
if (mcontext == NULL) if (mcontext == NULL)
{ {

View File

@ -888,6 +888,7 @@ typedef struct dfa_match_block {
uint32_t nllen; /* Newline string length */ uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */ uint16_t bsr_convention; /* \R interpretation */
pcre2_callout_block *cb; /* Points to a callout block */
void *callout_data; /* To pass back to callouts */ void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
dfa_recursion_info *recursive; /* Linked list of recursion data */ dfa_recursion_info *recursive; /* Linked list of recursion data */