Support manual callout with string argument at start of condition.

This commit is contained in:
Philip.Hazel 2015-03-12 17:00:18 +00:00
parent e43b3d435b
commit baf08a3d37
5 changed files with 181 additions and 31 deletions

View File

@ -5219,9 +5219,39 @@ for (;; ptr++)
if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
{ {
for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS)
if (ptr[i] == CHAR_RIGHT_PARENTHESIS) {
tempptr += i + 1; for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
tempptr += i + 1;
}
else
{
uint32_t delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
{
if (ptr[3] == PRIV(callout_start_delims)[i])
{
delimiter = PRIV(callout_end_delims)[i];
break;
}
}
if (delimiter != 0)
{
for (i = 4; ptr + i < cb->end_pattern; i++)
{
if (ptr[i] == delimiter)
{
if (ptr[i+1] == delimiter) i++;
else
{
if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2;
break;
}
}
}
}
}
} }
/* For conditions that are assertions, check the syntax, and then exit /* For conditions that are assertions, check the syntax, and then exit
@ -5574,34 +5604,34 @@ for (;; ptr++)
previous_callout = code; /* Save for later completion */ previous_callout = code; /* Save for later completion */
after_manual_callout = 1; /* Skip one item before completing */ after_manual_callout = 1; /* Skip one item before completing */
ptr++; /* Character after (?C */ ptr++; /* Character after (?C */
/* A callout may have a string argument, delimited by one of a fixed /* A callout may have a string argument, delimited by one of a fixed
number of characters, or an undelimited numerical argument, or no number of characters, or an undelimited numerical argument, or no
argument, which is the same as (?C0). Different opcodes are used for argument, which is the same as (?C0). Different opcodes are used for
the two cases. */ the two cases. */
if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
{ {
uint32_t delimiter = 0; uint32_t delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
{ {
if (*ptr == PRIV(callout_start_delims)[i]) if (*ptr == PRIV(callout_start_delims)[i])
{ {
delimiter = PRIV(callout_end_delims)[i]; delimiter = PRIV(callout_end_delims)[i];
break; break;
} }
} }
if (delimiter == 0) if (delimiter == 0)
{ {
*errorcodeptr = ERR82; *errorcodeptr = ERR82;
goto FAILED; goto FAILED;
} }
/* During the pre-compile phase, we parse the string and update the /* During the pre-compile phase, we parse the string and update the
length. There is no need to generate any code. */ length. There is no need to generate any code. */
if (lengthptr != NULL) /* Only check the string */ if (lengthptr != NULL) /* Only check the string */
{ {
PCRE2_SPTR start = ptr; PCRE2_SPTR start = ptr;
@ -5610,25 +5640,25 @@ for (;; ptr++)
if (++ptr >= cb->end_pattern) if (++ptr >= cb->end_pattern)
{ {
*errorcodeptr = ERR81; *errorcodeptr = ERR81;
ptr = start; /* To give a more useful message */ ptr = start; /* To give a more useful message */
goto FAILED; goto FAILED;
} }
if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
} }
while (ptr[0] != delimiter); while (ptr[0] != delimiter);
/* Start points to the opening delimiter, ptr points to the /* Start points to the opening delimiter, ptr points to the
closing delimiter. We must allow for including the delimiter and closing delimiter. We must allow for including the delimiter and
for the terminating zero. Any doubled delimiters within the string for the terminating zero. Any doubled delimiters within the string
make this an overestimate, but it is not worth bothering about. */ make this an overestimate, but it is not worth bothering about. */
(*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE); (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
} }
/* In the real compile we can copy the string, knowing that it is /* In the real compile we can copy the string, knowing that it is
syntactically OK. The starting delimiter is included so that the syntactically OK. The starting delimiter is included so that the
client can discover it if they want. */ client can discover it if they want. */
else else
{ {
PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE); PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
@ -5638,7 +5668,7 @@ for (;; ptr++)
if (*ptr == delimiter) if (*ptr == delimiter)
{ {
if (ptr[1] == delimiter) ptr++; else break; if (ptr[1] == delimiter) ptr++; else break;
} }
*callout_string++ = *ptr++; *callout_string++ = *ptr++;
} }
*callout_string++ = CHAR_NULL; *callout_string++ = CHAR_NULL;
@ -5649,16 +5679,16 @@ for (;; ptr++)
(int)(callout_string - code)); (int)(callout_string - code));
code = callout_string; code = callout_string;
} }
/* Advance to what should be the closing parenthesis, which is /* Advance to what should be the closing parenthesis, which is
checked below. */ checked below. */
ptr++; ptr++;
} }
/* Handle a callout with an optional numerical argument, which must be /* Handle a callout with an optional numerical argument, which must be
less than or equal to 255. A missing argument gives 0. */ less than or equal to 255. A missing argument gives 0. */
else else
{ {
int n = 0; int n = 0;
@ -5677,9 +5707,9 @@ for (;; ptr++)
code[1 + 2*LINK_SIZE] = n; /* Callout number */ code[1 + 2*LINK_SIZE] = n; /* Callout number */
code += PRIV(OP_lengths)[OP_CALLOUT]; code += PRIV(OP_lengths)[OP_CALLOUT];
} }
/* Both formats must have a closing parenthesis */ /* Both formats must have a closing parenthesis */
if (*ptr != CHAR_RIGHT_PARENTHESIS) if (*ptr != CHAR_RIGHT_PARENTHESIS)
{ {
*errorcodeptr = ERR39; *errorcodeptr = ERR39;
@ -5687,7 +5717,7 @@ for (;; ptr++)
} }
/* Callouts cannot be quantified. */ /* Callouts cannot be quantified. */
previous = NULL; previous = NULL;
continue; continue;

8
testdata/testinput2 vendored
View File

@ -4210,4 +4210,12 @@ a random value. /Ix
/(?:a(?C`code`)){3}/B /(?:a(?C`code`)){3}/B
/^(?(?C25)(?=abc)abcd|xyz)/B
abcdefg
xyz123
/^(?(?C$abc$)(?=abc)abcd|xyz)/B
abcdefg
xyz123
# End of testinput2 # End of testinput2

8
testdata/testinput6 vendored
View File

@ -4827,4 +4827,12 @@
/^a(b)c(?C{AB})def/B /^a(b)c(?C{AB})def/B
abcdef\=callout_capture abcdef\=callout_capture
/^(?(?C25)(?=abc)abcd|xyz)/B
abcdefg
xyz123
/^(?(?C$abc$)(?=abc)abcd|xyz)/B
abcdefg
xyz123
# End of testinput6 # End of testinput6

52
testdata/testoutput2 vendored
View File

@ -14094,4 +14094,56 @@ Callout: {AB} last capture = 1
End End
------------------------------------------------------------------ ------------------------------------------------------------------
/^(?(?C25)(?=abc)abcd|xyz)/B
------------------------------------------------------------------
Bra
^
Cond
Callout 25 9 7
Assert
abc
Ket
abcd
Alt
xyz
Ket
Ket
End
------------------------------------------------------------------
abcdefg
--->abcdefg
25 ^ (?=abc)
0: abcd
xyz123
--->xyz123
25 ^ (?=abc)
0: xyz
/^(?(?C$abc$)(?=abc)abcd|xyz)/B
------------------------------------------------------------------
Bra
^
Cond
CalloutStr $abc$ 12 7
Assert
abc
Ket
abcd
Alt
xyz
Ket
Ket
End
------------------------------------------------------------------
abcdefg
Callout: $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
Callout: $abc$
--->xyz123
^ (?=abc)
0: xyz
# End of testinput2 # End of testinput2

52
testdata/testoutput6 vendored
View File

@ -7835,4 +7835,56 @@ Callout: {AB} last capture = 0
^ ^ d ^ ^ d
0: abcdef 0: abcdef
/^(?(?C25)(?=abc)abcd|xyz)/B
------------------------------------------------------------------
Bra
^
Cond
Callout 25 9 7
Assert
abc
Ket
abcd
Alt
xyz
Ket
Ket
End
------------------------------------------------------------------
abcdefg
--->abcdefg
25 ^ (?=abc)
0: abcd
xyz123
--->xyz123
25 ^ (?=abc)
0: xyz
/^(?(?C$abc$)(?=abc)abcd|xyz)/B
------------------------------------------------------------------
Bra
^
Cond
CalloutStr $abc$ 12 7
Assert
abc
Ket
abcd
Alt
xyz
Ket
Ket
End
------------------------------------------------------------------
abcdefg
Callout: $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
Callout: $abc$
--->xyz123
^ (?=abc)
0: xyz
# End of testinput6 # End of testinput6