Tidy up callout code in pcre2_dfa_match().
This commit is contained in:
parent
911d43cd1e
commit
f36a3235bc
|
@ -293,6 +293,66 @@ typedef struct stateblock {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Process a callout *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called to perform a callout.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code current code pointer
|
||||||
|
offsets points to current capture offsets
|
||||||
|
current_subject start of current subject match
|
||||||
|
ptr current position in subject
|
||||||
|
mb the match block
|
||||||
|
extracode extra code offset when called from condition
|
||||||
|
lengthptr where to return the callout length
|
||||||
|
|
||||||
|
Returns: the return from the callout
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
|
||||||
|
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
|
||||||
|
PCRE2_SIZE *lengthptr)
|
||||||
|
{
|
||||||
|
pcre2_callout_block *cb = mb->cb;
|
||||||
|
|
||||||
|
*lengthptr = (code[extracode] == OP_CALLOUT)?
|
||||||
|
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
|
||||||
|
(PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode);
|
||||||
|
|
||||||
|
if (mb->callout == NULL) return 0; /* No callout provided */
|
||||||
|
|
||||||
|
/* Fixed fields in the callout block are set once and for all at the start of
|
||||||
|
matching. */
|
||||||
|
|
||||||
|
cb->offset_vector = offsets;
|
||||||
|
cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
|
||||||
|
cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
|
||||||
|
cb->pattern_position = GET(code, 1 + extracode);
|
||||||
|
cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode);
|
||||||
|
|
||||||
|
if (code[extracode] == OP_CALLOUT)
|
||||||
|
{
|
||||||
|
cb->callout_number = code[1 + 2*LINK_SIZE + extracode];
|
||||||
|
cb->callout_string_offset = 0;
|
||||||
|
cb->callout_string = NULL;
|
||||||
|
cb->callout_string_length = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cb->callout_number = 0;
|
||||||
|
cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode);
|
||||||
|
cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1;
|
||||||
|
cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (mb->callout)(cb, mb->callout_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Match a Regular Expression - DFA engine *
|
* Match a Regular Expression - DFA engine *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -1370,7 +1430,7 @@ for (;;)
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
&ncount);
|
&ncount);
|
||||||
count++;
|
count++;
|
||||||
ADD_NEW_DATA(-state_offset, count, ncount);
|
ADD_NEW_DATA(-state_offset, count, ncount);
|
||||||
|
@ -1621,7 +1681,7 @@ for (;;)
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
&ncount);
|
&ncount);
|
||||||
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
|
||||||
}
|
}
|
||||||
|
@ -1882,7 +1942,7 @@ for (;;)
|
||||||
active_count--; /* Remove non-match possibility */
|
active_count--; /* Remove non-match possibility */
|
||||||
next_active_state--;
|
next_active_state--;
|
||||||
}
|
}
|
||||||
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
|
||||||
&ncount);
|
&ncount);
|
||||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
reset_could_continue = TRUE;
|
reset_could_continue = TRUE;
|
||||||
|
@ -2061,7 +2121,7 @@ for (;;)
|
||||||
if (clen > 0)
|
if (clen > 0)
|
||||||
{
|
{
|
||||||
int ncount = 0;
|
int ncount = 0;
|
||||||
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
|
||||||
end_subject, utf, &ncount);
|
end_subject, utf, &ncount);
|
||||||
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||||
reset_could_continue = TRUE;
|
reset_could_continue = TRUE;
|
||||||
|
@ -2566,46 +2626,10 @@ for (;;)
|
||||||
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
||||||
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)?
|
PCRE2_SIZE callout_length;
|
||||||
(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
|
rrc = do_callout(code, offsets, current_subject, ptr, mb,
|
||||||
(PCRE2_SIZE)GET(code, 2 + 3*LINK_SIZE);
|
1 + LINK_SIZE, &callout_length);
|
||||||
|
if (rrc < 0) return rrc; /* Abandon */
|
||||||
rrc = 0;
|
|
||||||
if (mb->callout != NULL)
|
|
||||||
{
|
|
||||||
pcre2_callout_block cb;
|
|
||||||
cb.version = 2;
|
|
||||||
cb.callout_flags = 0;
|
|
||||||
cb.capture_top = 1;
|
|
||||||
cb.capture_last = 0;
|
|
||||||
cb.offset_vector = offsets;
|
|
||||||
cb.mark = NULL; /* No (*MARK) support */
|
|
||||||
cb.subject = start_subject;
|
|
||||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
|
||||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
|
||||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
|
||||||
cb.pattern_position = GET(code, LINK_SIZE + 2);
|
|
||||||
cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE);
|
|
||||||
|
|
||||||
if (code[LINK_SIZE + 1] == OP_CALLOUT)
|
|
||||||
{
|
|
||||||
cb.callout_number = code[2 + 3*LINK_SIZE];
|
|
||||||
cb.callout_string_offset = 0;
|
|
||||||
cb.callout_string = NULL;
|
|
||||||
cb.callout_string_length = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cb.callout_number = 0;
|
|
||||||
cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
|
|
||||||
cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
|
|
||||||
cb.callout_string_length =
|
|
||||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
|
||||||
return rrc; /* Abandon */
|
|
||||||
}
|
|
||||||
if (rrc > 0) break; /* Fail this thread */
|
if (rrc > 0) break; /* Fail this thread */
|
||||||
code += callout_length; /* Skip callout data */
|
code += callout_length; /* Skip callout data */
|
||||||
}
|
}
|
||||||
|
@ -2937,45 +2961,10 @@ for (;;)
|
||||||
case OP_CALLOUT:
|
case OP_CALLOUT:
|
||||||
case OP_CALLOUT_STR:
|
case OP_CALLOUT_STR:
|
||||||
{
|
{
|
||||||
unsigned int callout_length = (*code == OP_CALLOUT)
|
PCRE2_SIZE callout_length;
|
||||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE);
|
rrc = do_callout(code, offsets, current_subject, ptr, mb, 0,
|
||||||
rrc = 0;
|
&callout_length);
|
||||||
|
if (rrc < 0) return rrc; /* Abandon */
|
||||||
if (mb->callout != NULL)
|
|
||||||
{
|
|
||||||
pcre2_callout_block cb;
|
|
||||||
cb.version = 2;
|
|
||||||
cb.callout_flags = 0;
|
|
||||||
cb.capture_top = 1;
|
|
||||||
cb.capture_last = 0;
|
|
||||||
cb.offset_vector = offsets;
|
|
||||||
cb.mark = NULL; /* No (*MARK) support */
|
|
||||||
cb.subject = start_subject;
|
|
||||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
|
||||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
|
||||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
|
||||||
cb.pattern_position = GET(code, 1);
|
|
||||||
cb.next_item_length = GET(code, 1 + LINK_SIZE);
|
|
||||||
|
|
||||||
if (*code == OP_CALLOUT)
|
|
||||||
{
|
|
||||||
cb.callout_number = code[1 + 2*LINK_SIZE];
|
|
||||||
cb.callout_string_offset = 0;
|
|
||||||
cb.callout_string = NULL;
|
|
||||||
cb.callout_string_length = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cb.callout_number = 0;
|
|
||||||
cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
|
|
||||||
cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
|
|
||||||
cb.callout_string_length =
|
|
||||||
callout_length - (1 + 4*LINK_SIZE) - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
|
||||||
return rrc; /* Abandon */
|
|
||||||
}
|
|
||||||
if (rrc == 0)
|
if (rrc == 0)
|
||||||
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); }
|
{ ADD_ACTIVE(state_offset + (int)callout_length, 0); }
|
||||||
}
|
}
|
||||||
|
@ -3094,6 +3083,7 @@ const uint8_t *start_bits = NULL;
|
||||||
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
|
/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
|
||||||
is used below, and it expects NLBLOCK to be defined as a pointer. */
|
is used below, and it expects NLBLOCK to be defined as a pointer. */
|
||||||
|
|
||||||
|
pcre2_callout_block cb;
|
||||||
dfa_match_block actual_match_block;
|
dfa_match_block actual_match_block;
|
||||||
dfa_match_block *mb = &actual_match_block;
|
dfa_match_block *mb = &actual_match_block;
|
||||||
|
|
||||||
|
@ -3171,9 +3161,21 @@ startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||||
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||||
bumpalong_limit = end_subject;
|
bumpalong_limit = end_subject;
|
||||||
|
|
||||||
/* Get data from the match context, if present, and fill in the fields in the
|
/* Initialize and set up the fixed fields in the callout block, with a pointer
|
||||||
match block. It is an error to set an offset limit without setting the flag at
|
in the match block. */
|
||||||
compile time. */
|
|
||||||
|
mb->cb = &cb;
|
||||||
|
cb.version = 2;
|
||||||
|
cb.subject = subject;
|
||||||
|
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
|
||||||
|
cb.callout_flags = 0;
|
||||||
|
cb.capture_top = 1; /* No capture support */
|
||||||
|
cb.capture_last = 0;
|
||||||
|
cb.mark = NULL; /* No (*MARK) support */
|
||||||
|
|
||||||
|
/* Get data from the match context, if present, and fill in the remaining
|
||||||
|
fields in the match block. It is an error to set an offset limit without
|
||||||
|
setting the flag at compile time. */
|
||||||
|
|
||||||
if (mcontext == NULL)
|
if (mcontext == NULL)
|
||||||
{
|
{
|
||||||
|
|
|
@ -888,6 +888,7 @@ typedef struct dfa_match_block {
|
||||||
uint32_t nllen; /* Newline string length */
|
uint32_t nllen; /* Newline string length */
|
||||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||||
uint16_t bsr_convention; /* \R interpretation */
|
uint16_t bsr_convention; /* \R interpretation */
|
||||||
|
pcre2_callout_block *cb; /* Points to a callout block */
|
||||||
void *callout_data; /* To pass back to callouts */
|
void *callout_data; /* To pass back to callouts */
|
||||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||||
|
|
Loading…
Reference in New Issue