Code for callouts with string arguments. Documentation not yet updated.
This commit is contained in:
parent
24189152fe
commit
d0cf279d87
|
@ -1,6 +1,12 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.20 xx-xx-2015
|
||||
------------------------
|
||||
|
||||
1. Callouts with string arguments have been added.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
||||
|
|
|
@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [10])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2015-03-06])
|
||||
m4_define(pcre2_minor, [20])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2015-03-11])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -337,6 +337,9 @@ typedef struct pcre2_callout_block { \
|
|||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
uint32_t callout_string_length; /* Length of string compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block;
|
||||
|
||||
|
|
|
@ -604,6 +604,12 @@ for(;;)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (c == OP_CALLOUT_STR)
|
||||
{
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == OP_ALT)
|
||||
{
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
|
@ -1234,6 +1240,10 @@ for (;;)
|
|||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
code += GET(code, 1);
|
||||
|
|
|
@ -573,7 +573,8 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
|
||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 };
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -617,7 +618,6 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE }
|
||||
};
|
||||
|
||||
|
||||
/* This table is used when converting repeating opcodes into possessified
|
||||
versions as a result of an explicit possessive quantifier such as ++. A zero
|
||||
value means there is no possessified version - in those cases the item in
|
||||
|
@ -730,11 +730,11 @@ Returns: new code pointer
|
|||
static PCRE2_UCHAR *
|
||||
auto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb)
|
||||
{
|
||||
*code++ = OP_CALLOUT;
|
||||
*code++ = 255;
|
||||
PUT(code, 0, ptr - cb->start_pattern); /* Pattern offset */
|
||||
PUT(code, LINK_SIZE, 0); /* Default length */
|
||||
return code + 2 * LINK_SIZE;
|
||||
code[0] = OP_CALLOUT;
|
||||
PUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */
|
||||
PUT(code, 1 + LINK_SIZE, 0); /* Default length */
|
||||
code[1 + 2*LINK_SIZE] = 255;
|
||||
return code + PRIV(OP_lengths)[OP_CALLOUT];
|
||||
}
|
||||
|
||||
|
||||
|
@ -759,8 +759,8 @@ static void
|
|||
complete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr,
|
||||
compile_block *cb)
|
||||
{
|
||||
size_t length = ptr - cb->start_pattern - GET(previous_callout, 2);
|
||||
PUT(previous_callout, 2 + LINK_SIZE, length);
|
||||
size_t length = ptr - cb->start_pattern - GET(previous_callout, 1);
|
||||
PUT(previous_callout, 1 + LINK_SIZE, length);
|
||||
}
|
||||
|
||||
|
||||
|
@ -909,6 +909,10 @@ for (;;)
|
|||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Handle literal characters */
|
||||
|
||||
case OP_CHAR:
|
||||
|
@ -1157,6 +1161,10 @@ for (;;)
|
|||
code += PRIV(OP_lengths)[*code];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
code += GET(code, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
return code;
|
||||
}
|
||||
|
@ -2279,11 +2287,13 @@ for (;;)
|
|||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle recursion */
|
||||
|
||||
|
@ -2442,11 +2452,13 @@ for (;;)
|
|||
if (c == OP_END) return NULL;
|
||||
if (c == OP_RECURSE) return code;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
|
@ -5558,30 +5570,124 @@ for (;; ptr++)
|
|||
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case CHAR_C: /* Callout - may be followed by digits; */
|
||||
case CHAR_C: /* Callout */
|
||||
previous_callout = code; /* Save for later completion */
|
||||
after_manual_callout = 1; /* Skip one item before completing */
|
||||
*code++ = OP_CALLOUT;
|
||||
ptr++; /* Character after (?C */
|
||||
|
||||
/* A callout may have a string argument, delimited by one of a fixed
|
||||
number of characters, or an undelimited numerical argument, or no
|
||||
argument, which is the same as (?C0). Different opcodes are used for
|
||||
the two cases. */
|
||||
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
|
||||
{
|
||||
uint32_t delimiter = 0;
|
||||
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
{
|
||||
if (*ptr == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
delimiter = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (delimiter == 0)
|
||||
{
|
||||
*errorcodeptr = ERR82;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* During the pre-compile phase, we parse the string and update the
|
||||
length. There is no need to generate any code. */
|
||||
|
||||
if (lengthptr != NULL) /* Only check the string */
|
||||
{
|
||||
PCRE2_SPTR start = ptr;
|
||||
do
|
||||
{
|
||||
if (++ptr >= cb->end_pattern)
|
||||
{
|
||||
*errorcodeptr = ERR81;
|
||||
ptr = start; /* To give a more useful message */
|
||||
goto FAILED;
|
||||
}
|
||||
if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
|
||||
}
|
||||
while (ptr[0] != delimiter);
|
||||
|
||||
/* Start points to the opening delimiter, ptr points to the
|
||||
closing delimiter. We must allow for including the delimiter and
|
||||
for the terminating zero. Any doubled delimiters within the string
|
||||
make this an overestimate, but it is not worth bothering about. */
|
||||
|
||||
(*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
|
||||
}
|
||||
|
||||
/* In the real compile we can copy the string, knowing that it is
|
||||
syntactically OK. The starting delimiter is included so that the
|
||||
client can discover it if they want. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
|
||||
*callout_string++ = *ptr++;
|
||||
for(;;)
|
||||
{
|
||||
if (*ptr == delimiter)
|
||||
{
|
||||
if (ptr[1] == delimiter) ptr++; else break;
|
||||
}
|
||||
*callout_string++ = *ptr++;
|
||||
}
|
||||
*callout_string++ = CHAR_NULL;
|
||||
code[0] = OP_CALLOUT_STR;
|
||||
PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */
|
||||
PUT(code, 1 + LINK_SIZE, 0); /* Default length */
|
||||
PUT(code, 1 + 2*LINK_SIZE, /* Compute size */
|
||||
(int)(callout_string - code));
|
||||
code = callout_string;
|
||||
}
|
||||
|
||||
/* Advance to what should be the closing parenthesis, which is
|
||||
checked below. */
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Handle a callout with an optional numerical argument, which must be
|
||||
less than or equal to 255. A missing argument gives 0. */
|
||||
|
||||
else
|
||||
{
|
||||
int n = 0;
|
||||
ptr++;
|
||||
while(IS_DIGIT(*ptr))
|
||||
code[0] = OP_CALLOUT; /* Numerical callout */
|
||||
while (IS_DIGIT(*ptr))
|
||||
{
|
||||
n = n * 10 + *ptr++ - CHAR_0;
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR39;
|
||||
goto FAILED;
|
||||
if (n > 255)
|
||||
{
|
||||
*errorcodeptr = ERR38;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
if (n > 255)
|
||||
{
|
||||
*errorcodeptr = ERR38;
|
||||
goto FAILED;
|
||||
}
|
||||
*code++ = n;
|
||||
PUT(code, 0, (int)(ptr - cb->start_pattern + 1)); /* Pattern offset */
|
||||
PUT(code, LINK_SIZE, 0); /* Default length */
|
||||
code += 2 * LINK_SIZE;
|
||||
PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */
|
||||
PUT(code, 1 + LINK_SIZE, 0); /* Default length */
|
||||
code[1 + 2*LINK_SIZE] = n; /* Callout number */
|
||||
code += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
}
|
||||
|
||||
/* Both formats must have a closing parenthesis */
|
||||
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR39;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* Callouts cannot be quantified. */
|
||||
|
||||
previous = NULL;
|
||||
continue;
|
||||
|
||||
|
@ -7164,7 +7270,10 @@ do {
|
|||
if (op == OP_COND)
|
||||
{
|
||||
scode += 1 + LINK_SIZE;
|
||||
|
||||
if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
|
||||
|
||||
switch (*scode)
|
||||
{
|
||||
case OP_CREF:
|
||||
|
|
|
@ -161,6 +161,7 @@ static const uint8_t coptable[] = {
|
|||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
|
@ -233,6 +234,7 @@ static const uint8_t poptable[] = {
|
|||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* CALLOUT_STR */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
|
@ -2605,14 +2607,16 @@ for (;;)
|
|||
is inserted between OP_COND and an assertion condition. This does not
|
||||
happen for the other conditions. */
|
||||
|
||||
if (code[LINK_SIZE+1] == OP_CALLOUT)
|
||||
if (code[LINK_SIZE + 1] == OP_CALLOUT
|
||||
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
|
||||
{
|
||||
unsigned int callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)
|
||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 2 + 3*LINK_SIZE);
|
||||
rrc = 0;
|
||||
if (mb->callout != NULL)
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 0;
|
||||
cb.callout_number = code[LINK_SIZE+2];
|
||||
cb.version = 1;
|
||||
cb.capture_top = 1;
|
||||
cb.capture_last = 0;
|
||||
cb.offset_vector = offsets;
|
||||
|
@ -2621,13 +2625,28 @@ for (;;)
|
|||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||
cb.pattern_position = GET(code, LINK_SIZE + 3);
|
||||
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
|
||||
cb.pattern_position = GET(code, LINK_SIZE + 2);
|
||||
cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE);
|
||||
|
||||
if (code[LINK_SIZE + 1] == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = code[2 + 3*LINK_SIZE];
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||
return rrc; /* Abandon */
|
||||
}
|
||||
if (rrc > 0) break; /* Fail this thread */
|
||||
code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
|
||||
code += callout_length; /* Skip callout data */
|
||||
}
|
||||
|
||||
condcode = code[LINK_SIZE+1];
|
||||
|
@ -2954,27 +2973,47 @@ for (;;)
|
|||
/* Handle callouts */
|
||||
|
||||
case OP_CALLOUT:
|
||||
rrc = 0;
|
||||
if (mb->callout != NULL)
|
||||
case OP_CALLOUT_STR:
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 0;
|
||||
cb.callout_number = code[1];
|
||||
cb.capture_top = 1;
|
||||
cb.capture_last = 0;
|
||||
cb.offset_vector = offsets;
|
||||
cb.mark = NULL; /* No (*MARK) support */
|
||||
cb.subject = start_subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||
cb.pattern_position = GET(code, 2);
|
||||
cb.next_item_length = GET(code, 2 + LINK_SIZE);
|
||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||
return rrc; /* Abandon */
|
||||
unsigned int callout_length = (*code == OP_CALLOUT)
|
||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE);
|
||||
rrc = 0;
|
||||
|
||||
if (mb->callout != NULL)
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 1;
|
||||
cb.capture_top = 1;
|
||||
cb.capture_last = 0;
|
||||
cb.offset_vector = offsets;
|
||||
cb.mark = NULL; /* No (*MARK) support */
|
||||
cb.subject = start_subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||
cb.pattern_position = GET(code, 1);
|
||||
cb.next_item_length = GET(code, 1 + LINK_SIZE);
|
||||
|
||||
if (*code == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = code[1 + 2*LINK_SIZE];
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||
return rrc; /* Abandon */
|
||||
}
|
||||
if (rrc == 0)
|
||||
{ ADD_ACTIVE(state_offset + callout_length, 0); }
|
||||
}
|
||||
if (rrc == 0)
|
||||
{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
|
||||
break;
|
||||
|
||||
|
||||
|
|
|
@ -161,6 +161,8 @@ static const char compile_error_texts[] =
|
|||
"syntax error in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -1477,84 +1477,85 @@ enum {
|
|||
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
|
||||
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 118 Call out to external function if provided */
|
||||
OP_CALLOUT_STR, /* 119 Call out with string argument */
|
||||
|
||||
OP_ALT, /* 119 Start of alternation */
|
||||
OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 121 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 123 Possessive unlimited repeat. */
|
||||
OP_ALT, /* 120 Start of alternation */
|
||||
OP_KET, /* 121 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 122 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 124 Possessive unlimited repeat. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
|
||||
asserts must remain in order. */
|
||||
|
||||
OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 125 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 126 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 127 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
|
||||
OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 126 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 127 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 128 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
|
||||
|
||||
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
|
||||
after the assertions, with ONCE first, as there's a test for >= ONCE for a
|
||||
subpattern that isn't an assertion. The POS versions must immediately follow
|
||||
the non-POS versions in each case. */
|
||||
|
||||
OP_ONCE, /* 129 Atomic group, contains captures */
|
||||
OP_ONCE_NC, /* 130 Atomic group containing no captures */
|
||||
OP_BRA, /* 131 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 133 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 135 Conditional group */
|
||||
OP_ONCE, /* 130 Atomic group, contains captures */
|
||||
OP_ONCE_NC, /* 131 Atomic group containing no captures */
|
||||
OP_BRA, /* 132 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 134 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 136 Conditional group */
|
||||
|
||||
/* These five must follow the previous five, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 138 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 140 Conditional group, check empty */
|
||||
OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 139 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 141 Conditional group, check empty */
|
||||
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_CREF, /* 141 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 146 Always true (used by VERSION) */
|
||||
OP_CREF, /* 142 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 144 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 147 Always true (used by VERSION) */
|
||||
|
||||
OP_BRAZERO, /* 147 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 148 order. */
|
||||
OP_BRAPOSZERO, /* 149 */
|
||||
OP_BRAZERO, /* 148 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 149 order. */
|
||||
OP_BRAPOSZERO, /* 150 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 150 always has an argument */
|
||||
OP_PRUNE, /* 151 */
|
||||
OP_PRUNE_ARG, /* 152 same, but with argument */
|
||||
OP_SKIP, /* 153 */
|
||||
OP_SKIP_ARG, /* 154 same, but with argument */
|
||||
OP_THEN, /* 155 */
|
||||
OP_THEN_ARG, /* 156 same, but with argument */
|
||||
OP_COMMIT, /* 157 */
|
||||
OP_MARK, /* 151 always has an argument */
|
||||
OP_PRUNE, /* 152 */
|
||||
OP_PRUNE_ARG, /* 153 same, but with argument */
|
||||
OP_SKIP, /* 154 */
|
||||
OP_SKIP_ARG, /* 155 same, but with argument */
|
||||
OP_THEN, /* 156 */
|
||||
OP_THEN_ARG, /* 157 same, but with argument */
|
||||
OP_COMMIT, /* 158 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 158 */
|
||||
OP_ACCEPT, /* 159 */
|
||||
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
|
||||
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
|
||||
OP_FAIL, /* 159 */
|
||||
OP_ACCEPT, /* 160 */
|
||||
OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
|
||||
OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 162 */
|
||||
OP_SKIPZERO, /* 163 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 163 */
|
||||
OP_DEFINE, /* 164 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
|
@ -1598,7 +1599,7 @@ some cases doesn't actually use these names at all). */
|
|||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
|
||||
"Recurse", "Callout", \
|
||||
"Recurse", "Callout", "CalloutStr", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
|
||||
"Once", "Once_NC", \
|
||||
|
@ -1672,7 +1673,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||
1+2*IMM2_SIZE, /* DNREF */ \
|
||||
1+2*IMM2_SIZE, /* DNREFI */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
2+2*LINK_SIZE, /* CALLOUT */ \
|
||||
1+2*LINK_SIZE+1, /* CALLOUT */ \
|
||||
0, /* CALLOUT_STR - variable length */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
|
@ -1806,6 +1808,8 @@ extern const uint8_t PRIV(utf8_table4)[];
|
|||
#endif
|
||||
|
||||
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
|
||||
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
|
||||
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
|
||||
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
|
||||
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
|
||||
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
|
||||
|
@ -1824,6 +1828,8 @@ extern const uint8_t PRIV(utf8_table4)[];
|
|||
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
|
||||
|
||||
extern const uint8_t PRIV(OP_lengths)[];
|
||||
extern const uint32_t PRIV(callout_end_delims)[];
|
||||
extern const uint32_t PRIV(callout_start_delims)[];
|
||||
extern const pcre2_compile_context PRIV(default_compile_context);
|
||||
extern const pcre2_match_context PRIV(default_match_context);
|
||||
extern const uint8_t PRIV(default_tables)[];
|
||||
|
|
|
@ -771,6 +771,9 @@ switch(*cc)
|
|||
#endif
|
||||
return cc + 1;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
return cc + GET(cc, 1 + 2*LINK_SIZE);
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
return cc + GET(cc, 1);
|
||||
|
@ -821,7 +824,7 @@ while (cc < ccend)
|
|||
case OP_SCOND:
|
||||
/* Only AUTO_CALLOUT can insert this opcode. We do
|
||||
not intend to support this case. */
|
||||
if (cc[1 + LINK_SIZE] == OP_CALLOUT)
|
||||
if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
|
||||
return FALSE;
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
@ -855,12 +858,13 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CALLOUT_STR:
|
||||
if (common->capture_last_ptr == 0)
|
||||
{
|
||||
common->capture_last_ptr = common->ovector_start;
|
||||
common->ovector_start += sizeof(sljit_sw);
|
||||
}
|
||||
cc += 2 + 2 * LINK_SIZE;
|
||||
cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
case OP_THEN_ARG:
|
||||
|
@ -6296,7 +6300,7 @@ uint32_t i;
|
|||
if (arguments->callout == NULL)
|
||||
return 0;
|
||||
|
||||
callout_block->version = 0;
|
||||
callout_block->version = 1;
|
||||
|
||||
/* Offsets in subject. */
|
||||
callout_block->subject_length = arguments->end - arguments->begin;
|
||||
|
@ -6333,6 +6337,10 @@ static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *com
|
|||
DEFINE_COMPILER;
|
||||
backtrack_common *backtrack;
|
||||
sljit_si mov_opcode;
|
||||
unsigned int callout_length = (*cc == OP_CALLOUT)
|
||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
|
||||
sljit_sw value1;
|
||||
sljit_sw value2;
|
||||
|
||||
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
|
||||
|
||||
|
@ -6341,7 +6349,8 @@ allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
|
|||
SLJIT_ASSERT(common->capture_last_ptr != 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
|
||||
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
|
||||
OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
|
||||
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
|
||||
OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
|
||||
OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
|
||||
|
||||
/* These pointer sized fields temporarly stores internal variables. */
|
||||
|
@ -6352,8 +6361,22 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
|
|||
if (common->mark_ptr != 0)
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
|
||||
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_UI : SLJIT_MOV;
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
|
||||
|
||||
if (*cc == OP_CALLOUT)
|
||||
{
|
||||
value1 = 0;
|
||||
value2 = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
|
||||
value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
|
||||
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
|
||||
|
||||
/* Needed to save important temporary registers. */
|
||||
|
@ -6372,7 +6395,7 @@ if (common->forced_quit_label == NULL)
|
|||
add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
|
||||
else
|
||||
JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
|
||||
return cc + 2 + 2 * LINK_SIZE;
|
||||
return cc + callout_length;
|
||||
}
|
||||
|
||||
#undef CALLOUT_ARG_SIZE
|
||||
|
@ -8377,6 +8400,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CALLOUT_STR:
|
||||
cc = compile_callout_matchingpath(common, cc, parent);
|
||||
break;
|
||||
|
||||
|
@ -9561,6 +9585,7 @@ while (current)
|
|||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CALLOUT_STR:
|
||||
case OP_FAIL:
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
|
|
|
@ -1310,13 +1310,15 @@ for (;;)
|
|||
/* Because of the way auto-callout works during compile, a callout item is
|
||||
inserted between OP_COND and an assertion condition. */
|
||||
|
||||
if (*ecode == OP_CALLOUT)
|
||||
if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR)
|
||||
{
|
||||
unsigned int callout_length = (*ecode == OP_CALLOUT)
|
||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
|
||||
|
||||
if (mb->callout != NULL)
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 0;
|
||||
cb.callout_number = ecode[1];
|
||||
cb.version = 1;
|
||||
cb.capture_top = offset_top/2;
|
||||
cb.capture_last = mb->capture_last & CAPLMASK;
|
||||
cb.offset_vector = mb->ovector;
|
||||
|
@ -1325,8 +1327,23 @@ for (;;)
|
|||
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||
cb.pattern_position = GET(ecode, 2);
|
||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||
cb.pattern_position = GET(ecode, 1);
|
||||
cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
|
||||
|
||||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (rrc < 0) RRETURN(rrc);
|
||||
|
@ -1335,8 +1352,8 @@ for (;;)
|
|||
/* Advance ecode past the callout, so it now points to the condition. We
|
||||
must adjust codelink so that the value of ecode+codelink is unchanged. */
|
||||
|
||||
ecode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
codelink -= PRIV(OP_lengths)[OP_CALLOUT];
|
||||
ecode += callout_length;
|
||||
codelink -= callout_length;
|
||||
}
|
||||
|
||||
/* Test the various possible conditions */
|
||||
|
@ -1716,26 +1733,47 @@ for (;;)
|
|||
function is able to force a failure. */
|
||||
|
||||
case OP_CALLOUT:
|
||||
if (mb->callout != NULL)
|
||||
case OP_CALLOUT_STR:
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 0;
|
||||
cb.callout_number = ecode[1];
|
||||
cb.capture_top = offset_top/2;
|
||||
cb.capture_last = mb->capture_last & CAPLMASK;
|
||||
cb.offset_vector = mb->ovector;
|
||||
cb.mark = mb->nomatch_mark;
|
||||
cb.subject = mb->start_subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||
cb.pattern_position = GET(ecode, 2);
|
||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (rrc < 0) RRETURN(rrc);
|
||||
unsigned int callout_length = (*ecode == OP_CALLOUT)
|
||||
? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
|
||||
|
||||
if (mb->callout != NULL)
|
||||
{
|
||||
pcre2_callout_block cb;
|
||||
cb.version = 1;
|
||||
cb.callout_number = ecode[LINK_SIZE + 1];
|
||||
cb.capture_top = offset_top/2;
|
||||
cb.capture_last = mb->capture_last & CAPLMASK;
|
||||
cb.offset_vector = mb->ovector;
|
||||
cb.mark = mb->nomatch_mark;
|
||||
cb.subject = mb->start_subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||
cb.pattern_position = GET(ecode, 1);
|
||||
cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
|
||||
|
||||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
cb.callout_number = ecode[1 + 2*LINK_SIZE];
|
||||
cb.callout_string = NULL;
|
||||
cb.callout_string_length = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
|
||||
cb.callout_string_length =
|
||||
callout_length - (1 + 3*LINK_SIZE) - 2;
|
||||
}
|
||||
|
||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (rrc < 0) RRETURN(rrc);
|
||||
}
|
||||
ecode += callout_length;
|
||||
}
|
||||
ecode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Recursion either matches the current regex, or some subexpression. The
|
||||
|
|
|
@ -305,6 +305,7 @@ for(;;)
|
|||
{
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
unsigned int extra = 0;
|
||||
|
||||
|
@ -594,8 +595,23 @@ for(;;)
|
|||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
|
||||
GET(code, 2 + LINK_SIZE));
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 3*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring(f, code + 2 + 3*LINK_SIZE);
|
||||
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
|
@ -611,7 +627,6 @@ for(;;)
|
|||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
int i;
|
||||
unsigned int min, max;
|
||||
BOOL printmap;
|
||||
BOOL invertmap = FALSE;
|
||||
|
|
|
@ -199,6 +199,10 @@ for (;;)
|
|||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over a subpattern that has a {0} or {0,x} quantifier */
|
||||
|
||||
case OP_BRAZERO:
|
||||
|
@ -935,7 +939,11 @@ do
|
|||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
tcode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
tcode += GET(tcode, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
|
|
@ -66,6 +66,20 @@ adding to classes. */
|
|||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
|
|
|
@ -4519,9 +4519,9 @@ return capcount;
|
|||
/* Called from a PCRE2 library as a result of the (?C) item. We print out where
|
||||
we are in the match. Yield zero unless more callouts than the fail count, or
|
||||
the callout data is not zero. The only differences in the callout block for
|
||||
different code unit widths are that the pointers to the subject and the most
|
||||
recent MARK point to strings of the appropriate width. Casts can be used to
|
||||
deal with this.
|
||||
different code unit widths are that the pointers to the subject, the most
|
||||
recent MARK, and a callout argument string point to strings of the appropriate
|
||||
width. Casts can be used to deal with this.
|
||||
|
||||
Argument: a pointer to a callout block
|
||||
Return:
|
||||
|
@ -4535,11 +4535,31 @@ BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
|
|||
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
|
||||
FILE *f = (first_callout || callout_capture)? outfile : NULL;
|
||||
|
||||
/* For a callout with a string argument, show the string first because there
|
||||
isn't a tidy way to fit it in the rest of the data. */
|
||||
|
||||
if (cb->callout_string != NULL)
|
||||
{
|
||||
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
|
||||
fprintf(f, "Callout: %c", delimiter);
|
||||
PCHARSV(cb->callout_string, 0,
|
||||
cb->callout_string_length, utf, outfile);
|
||||
for (i = 0; callout_start_delims[i] != 0; i++)
|
||||
if (delimiter == callout_start_delims[i])
|
||||
{
|
||||
delimiter = callout_end_delims[i];
|
||||
break;
|
||||
}
|
||||
fprintf(outfile, "%c", delimiter);
|
||||
if (!callout_capture) fprintf(f, "\n");
|
||||
}
|
||||
|
||||
/* Show captured strings if required */
|
||||
|
||||
if (callout_capture)
|
||||
{
|
||||
fprintf(f, "Callout %d: last capture = %d\n",
|
||||
cb->callout_number, cb->capture_last);
|
||||
|
||||
if (cb->callout_string == NULL) fprintf(f, "Callout %d:", cb->callout_number);
|
||||
fprintf(f, " last capture = %d\n", cb->capture_last);
|
||||
for (i = 0; i < cb->capture_top * 2; i += 2)
|
||||
{
|
||||
fprintf(f, "%2d: ", i/2);
|
||||
|
@ -4553,7 +4573,7 @@ if (callout_capture)
|
|||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Re-print the subject in canonical form, the first time or if giving full
|
||||
datails. On subsequent calls in the same match, we use pchars just to find the
|
||||
printed lengths of the substrings. */
|
||||
|
@ -4572,19 +4592,22 @@ PCHARSV(cb->subject, cb->current_position,
|
|||
|
||||
if (f != NULL) fprintf(f, "\n");
|
||||
|
||||
/* Always print appropriate indicators, with callout number if not already
|
||||
shown. For automatic callouts, show the pattern offset. */
|
||||
/* For automatic callouts, show the pattern offset. Otherwise, for a numerical
|
||||
callout whose number has not already been shown with captured strings, show the
|
||||
number here. A callout with a string argument has been displayed above. */
|
||||
|
||||
if (cb->callout_number == 255)
|
||||
{
|
||||
fprintf(outfile, "%+3d ", (int)cb->pattern_position);
|
||||
if (cb->pattern_position > 99) fprintf(outfile, "\n ");
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
if (callout_capture) fprintf(outfile, " ");
|
||||
if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
|
||||
else fprintf(outfile, "%3d ", cb->callout_number);
|
||||
}
|
||||
|
||||
/* Now show position indicators */
|
||||
|
||||
for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
|
||||
fprintf(outfile, "^");
|
||||
|
|
|
@ -4178,4 +4178,32 @@ a random value. /Ix
|
|||
|
||||
/((?+1)(\1))/B
|
||||
|
||||
# Callouts with string arguments
|
||||
|
||||
/a(?C"/
|
||||
|
||||
/a(?C"a/
|
||||
|
||||
/a(?C"a"/
|
||||
|
||||
/a(?C"a"bcde(?C"b")xyz/
|
||||
|
||||
/a(?C"a)b""c")/B
|
||||
|
||||
/ab(?C" any text with spaces ")cde/B
|
||||
abcde
|
||||
12abcde
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef
|
||||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
|
||||
/^a(b)c(?C{AB})def/B
|
||||
abcdef\=callout_capture
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4811,4 +4811,20 @@
|
|||
/a(b)c(d)/
|
||||
abc\=ph,copy=0,copy=1,getall
|
||||
|
||||
/ab(?C" any text with spaces ")cde/B
|
||||
abcde
|
||||
12abcde
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef
|
||||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
|
||||
/^a(b)c(?C{AB})def/B
|
||||
abcdef\=callout_capture
|
||||
|
||||
# End of testinput6
|
||||
|
|
|
@ -3538,7 +3538,7 @@ Subject length lower bound = 2
|
|||
Failed: error 138 at offset 6: number after (?C is greater than 255
|
||||
|
||||
/(?Cab)xx/I
|
||||
Failed: error 139 at offset 3: closing parenthesis for (?C expected
|
||||
Failed: error 182 at offset 3: unrecognized string delimiter follows (?C
|
||||
|
||||
/(?C12vr)x/I
|
||||
Failed: error 139 at offset 5: closing parenthesis for (?C expected
|
||||
|
@ -13969,4 +13969,95 @@ Matched, but too many substrings
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# Callouts with string arguments
|
||||
|
||||
/a(?C"/
|
||||
Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument
|
||||
|
||||
/a(?C"a/
|
||||
Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument
|
||||
|
||||
/a(?C"a"/
|
||||
Failed: error 139 at offset 7: closing parenthesis for (?C expected
|
||||
|
||||
/a(?C"a"bcde(?C"b")xyz/
|
||||
Failed: error 139 at offset 7: closing parenthesis for (?C expected
|
||||
|
||||
/a(?C"a)b""c")/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
CalloutStr "a)b"c" 13 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/ab(?C" any text with spaces ")cde/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
ab
|
||||
CalloutStr " any text with spaces " 30 1
|
||||
cde
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcde
|
||||
Callout: " any text with spaces "
|
||||
--->abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
12abcde
|
||||
Callout: " any text with spaces "
|
||||
--->12abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef
|
||||
--->abcdef
|
||||
1 ^ ^ d
|
||||
0: abcdef
|
||||
1: b
|
||||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
Callout: "AB"
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
1: b
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: b
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
1: b
|
||||
|
||||
/^a(b)c(?C{AB})def/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
a
|
||||
CBra 1
|
||||
b
|
||||
Ket
|
||||
c
|
||||
CalloutStr {AB} 14 1
|
||||
def
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout: {AB} last capture = 1
|
||||
0: <unset>
|
||||
1: b
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
1: b
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -7773,4 +7773,66 @@ Partial match: abc
|
|||
Copy substring 1 failed (-2): partial match
|
||||
get substring list failed (-2): partial match
|
||||
|
||||
/ab(?C" any text with spaces ")cde/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
ab
|
||||
CalloutStr " any text with spaces " 30 1
|
||||
cde
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcde
|
||||
Callout: " any text with spaces "
|
||||
--->abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
12abcde
|
||||
Callout: " any text with spaces "
|
||||
--->12abcde
|
||||
^ ^ c
|
||||
0: abcde
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef
|
||||
--->abcdef
|
||||
1 ^ ^ d
|
||||
0: abcdef
|
||||
|
||||
/^a(b)c(?C"AB")def/
|
||||
abcdef
|
||||
Callout: "AB"
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
||||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
Callout 1: last capture = 0
|
||||
0:
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
||||
/^a(b)c(?C{AB})def/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
a
|
||||
CBra 1
|
||||
b
|
||||
Ket
|
||||
c
|
||||
CalloutStr {AB} 14 1
|
||||
def
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout: {AB} last capture = 0
|
||||
0:
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
||||
# End of testinput6
|
||||
|
|
Loading…
Reference in New Issue