diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 9cdbebb..b0cd866 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -293,6 +293,66 @@ typedef struct stateblock { +/************************************************* +* Process a callout * +*************************************************/ + +/* This function is called to perform a callout. + +Arguments: + code current code pointer + offsets points to current capture offsets + current_subject start of current subject match + ptr current position in subject + mb the match block + extracode extra code offset when called from condition + lengthptr where to return the callout length + +Returns: the return from the callout +*/ + +static int +do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, + PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode, + PCRE2_SIZE *lengthptr) +{ +pcre2_callout_block *cb = mb->cb; + +*lengthptr = (code[extracode] == OP_CALLOUT)? + (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] : + (PCRE2_SIZE)GET(code, 1 + 2*LINK_SIZE + extracode); + +if (mb->callout == NULL) return 0; /* No callout provided */ + +/* Fixed fields in the callout block are set once and for all at the start of +matching. */ + +cb->offset_vector = offsets; +cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject); +cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject); +cb->pattern_position = GET(code, 1 + extracode); +cb->next_item_length = GET(code, 1 + LINK_SIZE + extracode); + +if (code[extracode] == OP_CALLOUT) + { + cb->callout_number = code[1 + 2*LINK_SIZE + extracode]; + cb->callout_string_offset = 0; + cb->callout_string = NULL; + cb->callout_string_length = 0; + } +else + { + cb->callout_number = 0; + cb->callout_string_offset = GET(code, 1 + 3*LINK_SIZE + extracode); + cb->callout_string = code + (1 + 4*LINK_SIZE + extracode) + 1; + cb->callout_string_length = *lengthptr - (1 + 4*LINK_SIZE) - 2; + } + +return (mb->callout)(cb, mb->callout_data); +} + + + /************************************************* * Match a Regular Expression - DFA engine * *************************************************/ @@ -1370,7 +1430,7 @@ for (;;) active_count--; /* Remove non-match possibility */ next_active_state--; } - (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, &ncount); count++; ADD_NEW_DATA(-state_offset, count, ncount); @@ -1621,7 +1681,7 @@ for (;;) active_count--; /* Remove non-match possibility */ next_active_state--; } - (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, &ncount); ADD_NEW_DATA(-(state_offset + count), 0, ncount); } @@ -1882,7 +1942,7 @@ for (;;) active_count--; /* Remove non-match possibility */ next_active_state--; } - nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, + nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, &ncount); if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) reset_could_continue = TRUE; @@ -2061,7 +2121,7 @@ for (;;) if (clen > 0) { int ncount = 0; - PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, + PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, &ncount); if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0) reset_could_continue = TRUE; @@ -2566,46 +2626,10 @@ for (;;) if (code[LINK_SIZE + 1] == OP_CALLOUT || code[LINK_SIZE + 1] == OP_CALLOUT_STR) { - PCRE2_SIZE callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)? - (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] : - (PCRE2_SIZE)GET(code, 2 + 3*LINK_SIZE); - - rrc = 0; - if (mb->callout != NULL) - { - pcre2_callout_block cb; - cb.version = 2; - cb.callout_flags = 0; - cb.capture_top = 1; - cb.capture_last = 0; - cb.offset_vector = offsets; - cb.mark = NULL; /* No (*MARK) support */ - cb.subject = start_subject; - cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject); - cb.start_match = (PCRE2_SIZE)(current_subject - start_subject); - cb.current_position = (PCRE2_SIZE)(ptr - start_subject); - cb.pattern_position = GET(code, LINK_SIZE + 2); - cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE); - - if (code[LINK_SIZE + 1] == OP_CALLOUT) - { - cb.callout_number = code[2 + 3*LINK_SIZE]; - cb.callout_string_offset = 0; - cb.callout_string = NULL; - cb.callout_string_length = 0; - } - else - { - cb.callout_number = 0; - cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE); - cb.callout_string = code + (2 + 5*LINK_SIZE) + 1; - cb.callout_string_length = - callout_length - (1 + 4*LINK_SIZE) - 2; - } - - if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) - return rrc; /* Abandon */ - } + PCRE2_SIZE callout_length; + rrc = do_callout(code, offsets, current_subject, ptr, mb, + 1 + LINK_SIZE, &callout_length); + if (rrc < 0) return rrc; /* Abandon */ if (rrc > 0) break; /* Fail this thread */ code += callout_length; /* Skip callout data */ } @@ -2937,45 +2961,10 @@ for (;;) case OP_CALLOUT: case OP_CALLOUT_STR: { - unsigned int callout_length = (*code == OP_CALLOUT) - ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE); - rrc = 0; - - if (mb->callout != NULL) - { - pcre2_callout_block cb; - cb.version = 2; - cb.callout_flags = 0; - cb.capture_top = 1; - cb.capture_last = 0; - cb.offset_vector = offsets; - cb.mark = NULL; /* No (*MARK) support */ - cb.subject = start_subject; - cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject); - cb.start_match = (PCRE2_SIZE)(current_subject - start_subject); - cb.current_position = (PCRE2_SIZE)(ptr - start_subject); - cb.pattern_position = GET(code, 1); - cb.next_item_length = GET(code, 1 + LINK_SIZE); - - if (*code == OP_CALLOUT) - { - cb.callout_number = code[1 + 2*LINK_SIZE]; - cb.callout_string_offset = 0; - cb.callout_string = NULL; - cb.callout_string_length = 0; - } - else - { - cb.callout_number = 0; - cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE); - cb.callout_string = code + (1 + 4*LINK_SIZE) + 1; - cb.callout_string_length = - callout_length - (1 + 4*LINK_SIZE) - 2; - } - - if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) - return rrc; /* Abandon */ - } + PCRE2_SIZE callout_length; + rrc = do_callout(code, offsets, current_subject, ptr, mb, 0, + &callout_length); + if (rrc < 0) return rrc; /* Abandon */ if (rrc == 0) { ADD_ACTIVE(state_offset + (int)callout_length, 0); } } @@ -3094,6 +3083,7 @@ const uint8_t *start_bits = NULL; /* We need to have mb pointing to a match block, because the IS_NEWLINE macro is used below, and it expects NLBLOCK to be defined as a pointer. */ +pcre2_callout_block cb; dfa_match_block actual_match_block; dfa_match_block *mb = &actual_match_block; @@ -3171,9 +3161,21 @@ startline = (re->flags & PCRE2_STARTLINE) != 0; firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; bumpalong_limit = end_subject; -/* Get data from the match context, if present, and fill in the fields in the -match block. It is an error to set an offset limit without setting the flag at -compile time. */ +/* Initialize and set up the fixed fields in the callout block, with a pointer +in the match block. */ + +mb->cb = &cb; +cb.version = 2; +cb.subject = subject; +cb.subject_length = (PCRE2_SIZE)(end_subject - subject); +cb.callout_flags = 0; +cb.capture_top = 1; /* No capture support */ +cb.capture_last = 0; +cb.mark = NULL; /* No (*MARK) support */ + +/* Get data from the match context, if present, and fill in the remaining +fields in the match block. It is an error to set an offset limit without +setting the flag at compile time. */ if (mcontext == NULL) { diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index ed97a5b..7305280 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -888,6 +888,7 @@ typedef struct dfa_match_block { uint32_t nllen; /* Newline string length */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */ uint16_t bsr_convention; /* \R interpretation */ + pcre2_callout_block *cb; /* Points to a callout block */ void *callout_data; /* To pass back to callouts */ int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */ dfa_recursion_info *recursive; /* Linked list of recursion data */