From baf08a3d375b5eac84b8900a0fbf790d5afef1bb Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Thu, 12 Mar 2015 17:00:18 +0000 Subject: [PATCH] Support manual callout with string argument at start of condition. --- src/pcre2_compile.c | 92 +++++++++++++++++++++++++++++--------------- testdata/testinput2 | 8 ++++ testdata/testinput6 | 8 ++++ testdata/testoutput2 | 52 +++++++++++++++++++++++++ testdata/testoutput6 | 52 +++++++++++++++++++++++++ 5 files changed, 181 insertions(+), 31 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 331e60d..bb4d97a 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5219,9 +5219,39 @@ for (;; ptr++) if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) { - for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; - if (ptr[i] == CHAR_RIGHT_PARENTHESIS) - tempptr += i + 1; + if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS) + { + for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; + if (ptr[i] == CHAR_RIGHT_PARENTHESIS) + tempptr += i + 1; + } + else + { + uint32_t delimiter = 0; + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + { + if (ptr[3] == PRIV(callout_start_delims)[i]) + { + delimiter = PRIV(callout_end_delims)[i]; + break; + } + } + if (delimiter != 0) + { + for (i = 4; ptr + i < cb->end_pattern; i++) + { + if (ptr[i] == delimiter) + { + if (ptr[i+1] == delimiter) i++; + else + { + if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2; + break; + } + } + } + } + } } /* For conditions that are assertions, check the syntax, and then exit @@ -5574,34 +5604,34 @@ for (;; ptr++) previous_callout = code; /* Save for later completion */ after_manual_callout = 1; /* Skip one item before completing */ ptr++; /* Character after (?C */ - - /* A callout may have a string argument, delimited by one of a fixed + + /* A callout may have a string argument, delimited by one of a fixed number of characters, or an undelimited numerical argument, or no argument, which is the same as (?C0). Different opcodes are used for the two cases. */ - + if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) - { + { uint32_t delimiter = 0; - + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) { if (*ptr == PRIV(callout_start_delims)[i]) { delimiter = PRIV(callout_end_delims)[i]; - break; - } + break; + } } - + if (delimiter == 0) { *errorcodeptr = ERR82; - goto FAILED; - } + goto FAILED; + } /* During the pre-compile phase, we parse the string and update the length. There is no need to generate any code. */ - + if (lengthptr != NULL) /* Only check the string */ { PCRE2_SPTR start = ptr; @@ -5610,25 +5640,25 @@ for (;; ptr++) if (++ptr >= cb->end_pattern) { *errorcodeptr = ERR81; - ptr = start; /* To give a more useful message */ + ptr = start; /* To give a more useful message */ goto FAILED; } if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; } while (ptr[0] != delimiter); - + /* Start points to the opening delimiter, ptr points to the - closing delimiter. We must allow for including the delimiter and + closing delimiter. We must allow for including the delimiter and for the terminating zero. Any doubled delimiters within the string make this an overestimate, but it is not worth bothering about. */ - + (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE); } - + /* In the real compile we can copy the string, knowing that it is - syntactically OK. The starting delimiter is included so that the - client can discover it if they want. */ - + syntactically OK. The starting delimiter is included so that the + client can discover it if they want. */ + else { PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE); @@ -5638,7 +5668,7 @@ for (;; ptr++) if (*ptr == delimiter) { if (ptr[1] == delimiter) ptr++; else break; - } + } *callout_string++ = *ptr++; } *callout_string++ = CHAR_NULL; @@ -5649,16 +5679,16 @@ for (;; ptr++) (int)(callout_string - code)); code = callout_string; } - - /* Advance to what should be the closing parenthesis, which is + + /* Advance to what should be the closing parenthesis, which is checked below. */ - + ptr++; } - + /* Handle a callout with an optional numerical argument, which must be less than or equal to 255. A missing argument gives 0. */ - + else { int n = 0; @@ -5677,9 +5707,9 @@ for (;; ptr++) code[1 + 2*LINK_SIZE] = n; /* Callout number */ code += PRIV(OP_lengths)[OP_CALLOUT]; } - + /* Both formats must have a closing parenthesis */ - + if (*ptr != CHAR_RIGHT_PARENTHESIS) { *errorcodeptr = ERR39; @@ -5687,7 +5717,7 @@ for (;; ptr++) } /* Callouts cannot be quantified. */ - + previous = NULL; continue; diff --git a/testdata/testinput2 b/testdata/testinput2 index 4dc476f..9daeb29 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4210,4 +4210,12 @@ a random value. /Ix /(?:a(?C`code`)){3}/B +/^(?(?C25)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + # End of testinput2 diff --git a/testdata/testinput6 b/testdata/testinput6 index 60c69b7..342c8c5 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4827,4 +4827,12 @@ /^a(b)c(?C{AB})def/B abcdef\=callout_capture +/^(?(?C25)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + # End of testinput6 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 5a837cf..8a4a6a2 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14094,4 +14094,56 @@ Callout: {AB} last capture = 1 End ------------------------------------------------------------------ +/^(?(?C25)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + Callout 25 9 7 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +--->abcdefg + 25 ^ (?=abc) + 0: abcd + xyz123 +--->xyz123 + 25 ^ (?=abc) + 0: xyz + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + CalloutStr $abc$ 12 7 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +Callout: $abc$ +--->abcdefg + ^ (?=abc) + 0: abcd + xyz123 +Callout: $abc$ +--->xyz123 + ^ (?=abc) + 0: xyz + # End of testinput2 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 98054e1..8a93f84 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7835,4 +7835,56 @@ Callout: {AB} last capture = 0 ^ ^ d 0: abcdef +/^(?(?C25)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + Callout 25 9 7 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +--->abcdefg + 25 ^ (?=abc) + 0: abcd + xyz123 +--->xyz123 + 25 ^ (?=abc) + 0: xyz + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + CalloutStr $abc$ 12 7 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +Callout: $abc$ +--->abcdefg + ^ (?=abc) + 0: abcd + xyz123 +Callout: $abc$ +--->xyz123 + ^ (?=abc) + 0: xyz + # End of testinput6