From d0cf279d876dd740e072cc5e54722d3c24001e07 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 11 Mar 2015 17:44:16 +0000 Subject: [PATCH] Code for callouts with string arguments. Documentation not yet updated. --- ChangeLog | 6 ++ configure.ac | 6 +- src/pcre2.h.in | 3 + src/pcre2_auto_possess.c | 10 +++ src/pcre2_compile.c | 173 +++++++++++++++++++++++++++++++-------- src/pcre2_dfa_match.c | 89 ++++++++++++++------ src/pcre2_error.c | 2 + src/pcre2_internal.h | 100 +++++++++++----------- src/pcre2_jit_compile.c | 39 +++++++-- src/pcre2_match.c | 88 ++++++++++++++------ src/pcre2_printint.c | 21 ++++- src/pcre2_study.c | 10 ++- src/pcre2_tables.c | 14 ++++ src/pcre2test.c | 45 +++++++--- testdata/testinput2 | 28 +++++++ testdata/testinput6 | 16 ++++ testdata/testoutput2 | 93 ++++++++++++++++++++- testdata/testoutput6 | 62 ++++++++++++++ 18 files changed, 650 insertions(+), 155 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6354f4b..8e437d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,12 @@ Change Log for PCRE2 -------------------- +Version 10.20 xx-xx-2015 +------------------------ + +1. Callouts with string arguments have been added. + + Version 10.10 06-March-2015 --------------------------- diff --git a/configure.ac b/configure.ac index 8bb1ed2..00ddc40 100644 --- a/configure.ac +++ b/configure.ac @@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [10]) -m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2015-03-06]) +m4_define(pcre2_minor, [20]) +m4_define(pcre2_prerelease, [-RC1]) +m4_define(pcre2_date, [2015-03-11]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 9555f5e..d2cc3d6 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -337,6 +337,9 @@ typedef struct pcre2_callout_block { \ PCRE2_SIZE current_position; /* Where we currently are in the subject */ \ PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \ PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \ + /* ------------------- Added for Version 1 -------------------------- */ \ + PCRE2_SPTR callout_string; /* String compiled into pattern */ \ + uint32_t callout_string_length; /* Length of string compiled into pattern */ \ /* ------------------------------------------------------------------ */ \ } pcre2_callout_block; diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 15dd770..e25ec43 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -604,6 +604,12 @@ for(;;) continue; } + if (c == OP_CALLOUT_STR) + { + code += GET(code, 1 + 2*LINK_SIZE); + continue; + } + if (c == OP_ALT) { do code += GET(code, 1); while (*code == OP_ALT); @@ -1234,6 +1240,10 @@ for (;;) code += 2; break; + case OP_CALLOUT_STR: + code += GET(code, 1 + 2*LINK_SIZE); + break; + #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: code += GET(code, 1); diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index a8defd1..331e60d 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -573,7 +573,8 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, - ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 }; + ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, + ERR81, ERR82 }; /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -617,7 +618,6 @@ static pso pso_list[] = { { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } }; - /* This table is used when converting repeating opcodes into possessified versions as a result of an explicit possessive quantifier such as ++. A zero value means there is no possessified version - in those cases the item in @@ -730,11 +730,11 @@ Returns: new code pointer static PCRE2_UCHAR * auto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb) { -*code++ = OP_CALLOUT; -*code++ = 255; -PUT(code, 0, ptr - cb->start_pattern); /* Pattern offset */ -PUT(code, LINK_SIZE, 0); /* Default length */ -return code + 2 * LINK_SIZE; +code[0] = OP_CALLOUT; +PUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */ +PUT(code, 1 + LINK_SIZE, 0); /* Default length */ +code[1 + 2*LINK_SIZE] = 255; +return code + PRIV(OP_lengths)[OP_CALLOUT]; } @@ -759,8 +759,8 @@ static void complete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr, compile_block *cb) { -size_t length = ptr - cb->start_pattern - GET(previous_callout, 2); -PUT(previous_callout, 2 + LINK_SIZE, length); +size_t length = ptr - cb->start_pattern - GET(previous_callout, 1); +PUT(previous_callout, 1 + LINK_SIZE, length); } @@ -909,6 +909,10 @@ for (;;) cc += PRIV(OP_lengths)[*cc]; break; + case OP_CALLOUT_STR: + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + /* Handle literal characters */ case OP_CHAR: @@ -1157,6 +1161,10 @@ for (;;) code += PRIV(OP_lengths)[*code]; break; + case OP_CALLOUT_STR: + code += GET(code, 1 + 2*LINK_SIZE); + break; + default: return code; } @@ -2279,11 +2287,13 @@ for (;;) if (c == OP_END) return NULL; - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ + /* XCLASS is used for classes that cannot be represented just by a bit map. + This includes negated single high-valued characters. CALLOUT_STR is used for + callouts with string arguments. In both cases the length in the table is + zero; the actual length is stored in the compiled code. */ if (c == OP_XCLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Handle recursion */ @@ -2442,11 +2452,13 @@ for (;;) if (c == OP_END) return NULL; if (c == OP_RECURSE) return code; - /* XCLASS is used for classes that cannot be represented just by a bit - map. This includes negated single high-valued characters. The length in - the table is zero; the actual length is stored in the compiled code. */ + /* XCLASS is used for classes that cannot be represented just by a bit map. + This includes negated single high-valued characters. CALLOUT_STR is used for + callouts with string arguments. In both cases the length in the table is + zero; the actual length is stored in the compiled code. */ if (c == OP_XCLASS) code += GET(code, 1); + else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra @@ -5558,30 +5570,124 @@ for (;; ptr++) /* ------------------------------------------------------------ */ - case CHAR_C: /* Callout - may be followed by digits; */ + case CHAR_C: /* Callout */ previous_callout = code; /* Save for later completion */ after_manual_callout = 1; /* Skip one item before completing */ - *code++ = OP_CALLOUT; + ptr++; /* Character after (?C */ + + /* A callout may have a string argument, delimited by one of a fixed + number of characters, or an undelimited numerical argument, or no + argument, which is the same as (?C0). Different opcodes are used for + the two cases. */ + + if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) + { + uint32_t delimiter = 0; + + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + { + if (*ptr == PRIV(callout_start_delims)[i]) + { + delimiter = PRIV(callout_end_delims)[i]; + break; + } + } + + if (delimiter == 0) + { + *errorcodeptr = ERR82; + goto FAILED; + } + + /* During the pre-compile phase, we parse the string and update the + length. There is no need to generate any code. */ + + if (lengthptr != NULL) /* Only check the string */ + { + PCRE2_SPTR start = ptr; + do + { + if (++ptr >= cb->end_pattern) + { + *errorcodeptr = ERR81; + ptr = start; /* To give a more useful message */ + goto FAILED; + } + if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; + } + while (ptr[0] != delimiter); + + /* Start points to the opening delimiter, ptr points to the + closing delimiter. We must allow for including the delimiter and + for the terminating zero. Any doubled delimiters within the string + make this an overestimate, but it is not worth bothering about. */ + + (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE); + } + + /* In the real compile we can copy the string, knowing that it is + syntactically OK. The starting delimiter is included so that the + client can discover it if they want. */ + + else + { + PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE); + *callout_string++ = *ptr++; + for(;;) + { + if (*ptr == delimiter) + { + if (ptr[1] == delimiter) ptr++; else break; + } + *callout_string++ = *ptr++; + } + *callout_string++ = CHAR_NULL; + code[0] = OP_CALLOUT_STR; + PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */ + PUT(code, 1 + LINK_SIZE, 0); /* Default length */ + PUT(code, 1 + 2*LINK_SIZE, /* Compute size */ + (int)(callout_string - code)); + code = callout_string; + } + + /* Advance to what should be the closing parenthesis, which is + checked below. */ + + ptr++; + } + + /* Handle a callout with an optional numerical argument, which must be + less than or equal to 255. A missing argument gives 0. */ + + else { int n = 0; - ptr++; - while(IS_DIGIT(*ptr)) + code[0] = OP_CALLOUT; /* Numerical callout */ + while (IS_DIGIT(*ptr)) + { n = n * 10 + *ptr++ - CHAR_0; - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR39; - goto FAILED; + if (n > 255) + { + *errorcodeptr = ERR38; + goto FAILED; + } } - if (n > 255) - { - *errorcodeptr = ERR38; - goto FAILED; - } - *code++ = n; - PUT(code, 0, (int)(ptr - cb->start_pattern + 1)); /* Pattern offset */ - PUT(code, LINK_SIZE, 0); /* Default length */ - code += 2 * LINK_SIZE; + PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */ + PUT(code, 1 + LINK_SIZE, 0); /* Default length */ + code[1 + 2*LINK_SIZE] = n; /* Callout number */ + code += PRIV(OP_lengths)[OP_CALLOUT]; } + + /* Both formats must have a closing parenthesis */ + + if (*ptr != CHAR_RIGHT_PARENTHESIS) + { + *errorcodeptr = ERR39; + goto FAILED; + } + + /* Callouts cannot be quantified. */ + previous = NULL; continue; @@ -7164,7 +7270,10 @@ do { if (op == OP_COND) { scode += 1 + LINK_SIZE; + if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; + else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); + switch (*scode) { case OP_CREF: diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index ca57df9..be23fc8 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -161,6 +161,7 @@ static const uint8_t coptable[] = { 0, /* DNREFI */ 0, /* RECURSE */ 0, /* CALLOUT */ + 0, /* CALLOUT_STR */ 0, /* Alt */ 0, /* Ket */ 0, /* KetRmax */ @@ -233,6 +234,7 @@ static const uint8_t poptable[] = { 0, /* DNREFI */ 0, /* RECURSE */ 0, /* CALLOUT */ + 0, /* CALLOUT_STR */ 0, /* Alt */ 0, /* Ket */ 0, /* KetRmax */ @@ -2605,14 +2607,16 @@ for (;;) is inserted between OP_COND and an assertion condition. This does not happen for the other conditions. */ - if (code[LINK_SIZE+1] == OP_CALLOUT) + if (code[LINK_SIZE + 1] == OP_CALLOUT + || code[LINK_SIZE + 1] == OP_CALLOUT_STR) { + unsigned int callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 2 + 3*LINK_SIZE); rrc = 0; if (mb->callout != NULL) { pcre2_callout_block cb; - cb.version = 0; - cb.callout_number = code[LINK_SIZE+2]; + cb.version = 1; cb.capture_top = 1; cb.capture_last = 0; cb.offset_vector = offsets; @@ -2621,13 +2625,28 @@ for (;;) cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject); cb.start_match = (PCRE2_SIZE)(current_subject - start_subject); cb.current_position = (PCRE2_SIZE)(ptr - start_subject); - cb.pattern_position = GET(code, LINK_SIZE + 3); - cb.next_item_length = GET(code, 3 + 2*LINK_SIZE); + cb.pattern_position = GET(code, LINK_SIZE + 2); + cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE); + + if (code[LINK_SIZE + 1] == OP_CALLOUT) + { + cb.callout_number = code[2 + 3*LINK_SIZE]; + cb.callout_string = NULL; + cb.callout_string_length = 0; + } + else + { + cb.callout_number = 0; + cb.callout_string = code + (2 + 4*LINK_SIZE) + 1; + cb.callout_string_length = + callout_length - (1 + 3*LINK_SIZE) - 2; + } + if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) return rrc; /* Abandon */ } if (rrc > 0) break; /* Fail this thread */ - code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ + code += callout_length; /* Skip callout data */ } condcode = code[LINK_SIZE+1]; @@ -2954,27 +2973,47 @@ for (;;) /* Handle callouts */ case OP_CALLOUT: - rrc = 0; - if (mb->callout != NULL) + case OP_CALLOUT_STR: { - pcre2_callout_block cb; - cb.version = 0; - cb.callout_number = code[1]; - cb.capture_top = 1; - cb.capture_last = 0; - cb.offset_vector = offsets; - cb.mark = NULL; /* No (*MARK) support */ - cb.subject = start_subject; - cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject); - cb.start_match = (PCRE2_SIZE)(current_subject - start_subject); - cb.current_position = (PCRE2_SIZE)(ptr - start_subject); - cb.pattern_position = GET(code, 2); - cb.next_item_length = GET(code, 2 + LINK_SIZE); - if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) - return rrc; /* Abandon */ + unsigned int callout_length = (*code == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE); + rrc = 0; + + if (mb->callout != NULL) + { + pcre2_callout_block cb; + cb.version = 1; + cb.capture_top = 1; + cb.capture_last = 0; + cb.offset_vector = offsets; + cb.mark = NULL; /* No (*MARK) support */ + cb.subject = start_subject; + cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject); + cb.start_match = (PCRE2_SIZE)(current_subject - start_subject); + cb.current_position = (PCRE2_SIZE)(ptr - start_subject); + cb.pattern_position = GET(code, 1); + cb.next_item_length = GET(code, 1 + LINK_SIZE); + + if (*code == OP_CALLOUT) + { + cb.callout_number = code[1 + 2*LINK_SIZE]; + cb.callout_string = NULL; + cb.callout_string_length = 0; + } + else + { + cb.callout_number = 0; + cb.callout_string = code + (1 + 3*LINK_SIZE) + 1; + cb.callout_string_length = + callout_length - (1 + 3*LINK_SIZE) - 2; + } + + if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0) + return rrc; /* Abandon */ + } + if (rrc == 0) + { ADD_ACTIVE(state_offset + callout_length, 0); } } - if (rrc == 0) - { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } break; diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 07d92de..c67b5f5 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -161,6 +161,8 @@ static const char compile_error_texts[] = "syntax error in (?(VERSION condition\0" /* 80 */ "internal error: unknown opcode in auto_possessify()\0" + "missing terminating delimiter for callout with string argument\0" + "unrecognized string delimiter follows (?C\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index ee0a670..f288f39 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1477,84 +1477,85 @@ enum { OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */ OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */ OP_CALLOUT, /* 118 Call out to external function if provided */ + OP_CALLOUT_STR, /* 119 Call out with string argument */ - OP_ALT, /* 119 Start of alternation */ - OP_KET, /* 120 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 121 These two must remain together and in this */ - OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */ - OP_KETRPOS, /* 123 Possessive unlimited repeat. */ + OP_ALT, /* 120 Start of alternation */ + OP_KET, /* 121 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 122 These two must remain together and in this */ + OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */ + OP_KETRPOS, /* 124 Possessive unlimited repeat. */ /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four asserts must remain in order. */ - OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */ - OP_ASSERT, /* 125 Positive lookahead */ - OP_ASSERT_NOT, /* 126 Negative lookahead */ - OP_ASSERTBACK, /* 127 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */ + OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */ + OP_ASSERT, /* 126 Positive lookahead */ + OP_ASSERT_NOT, /* 127 Negative lookahead */ + OP_ASSERTBACK, /* 128 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */ /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. The POS versions must immediately follow the non-POS versions in each case. */ - OP_ONCE, /* 129 Atomic group, contains captures */ - OP_ONCE_NC, /* 130 Atomic group containing no captures */ - OP_BRA, /* 131 Start of non-capturing bracket */ - OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 133 Start of capturing bracket */ - OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 135 Conditional group */ + OP_ONCE, /* 130 Atomic group, contains captures */ + OP_ONCE_NC, /* 131 Atomic group containing no captures */ + OP_BRA, /* 132 Start of non-capturing bracket */ + OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 134 Start of capturing bracket */ + OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 136 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ - OP_SBRA, /* 136 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 138 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 140 Conditional group, check empty */ + OP_SBRA, /* 137 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 139 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 141 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ - OP_CREF, /* 141 Used to hold a capture number as condition */ - OP_DNCREF, /* 142 Used to point to duplicate names as a condition */ - OP_RREF, /* 143 Used to hold a recursion number as condition */ - OP_DNRREF, /* 144 Used to point to duplicate names as a condition */ - OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */ - OP_TRUE, /* 146 Always true (used by VERSION) */ + OP_CREF, /* 142 Used to hold a capture number as condition */ + OP_DNCREF, /* 143 Used to point to duplicate names as a condition */ + OP_RREF, /* 144 Used to hold a recursion number as condition */ + OP_DNRREF, /* 145 Used to point to duplicate names as a condition */ + OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */ + OP_TRUE, /* 147 Always true (used by VERSION) */ - OP_BRAZERO, /* 147 These two must remain together and in this */ - OP_BRAMINZERO, /* 148 order. */ - OP_BRAPOSZERO, /* 149 */ + OP_BRAZERO, /* 148 These two must remain together and in this */ + OP_BRAMINZERO, /* 149 order. */ + OP_BRAPOSZERO, /* 150 */ /* These are backtracking control verbs */ - OP_MARK, /* 150 always has an argument */ - OP_PRUNE, /* 151 */ - OP_PRUNE_ARG, /* 152 same, but with argument */ - OP_SKIP, /* 153 */ - OP_SKIP_ARG, /* 154 same, but with argument */ - OP_THEN, /* 155 */ - OP_THEN_ARG, /* 156 same, but with argument */ - OP_COMMIT, /* 157 */ + OP_MARK, /* 151 always has an argument */ + OP_PRUNE, /* 152 */ + OP_PRUNE_ARG, /* 153 same, but with argument */ + OP_SKIP, /* 154 */ + OP_SKIP_ARG, /* 155 same, but with argument */ + OP_THEN, /* 156 */ + OP_THEN_ARG, /* 157 same, but with argument */ + OP_COMMIT, /* 158 */ /* These are forced failure and success verbs */ - OP_FAIL, /* 158 */ - OP_ACCEPT, /* 159 */ - OP_ASSERT_ACCEPT, /* 160 Used inside assertions */ - OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 159 */ + OP_ACCEPT, /* 160 */ + OP_ASSERT_ACCEPT, /* 161 Used inside assertions */ + OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 162 */ + OP_SKIPZERO, /* 163 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - OP_DEFINE, /* 163 */ + OP_DEFINE, /* 164 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -1598,7 +1599,7 @@ some cases doesn't actually use these names at all). */ "*", "*?", "+", "+?", "?", "??", "{", "{", \ "*+","++", "?+", "{", \ "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ - "Recurse", "Callout", \ + "Recurse", "Callout", "CalloutStr", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ "Once", "Once_NC", \ @@ -1672,7 +1673,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+2*IMM2_SIZE, /* DNREF */ \ 1+2*IMM2_SIZE, /* DNREFI */ \ 1+LINK_SIZE, /* RECURSE */ \ - 2+2*LINK_SIZE, /* CALLOUT */ \ + 1+2*LINK_SIZE+1, /* CALLOUT */ \ + 0, /* CALLOUT_STR - variable length */ \ 1+LINK_SIZE, /* Alt */ \ 1+LINK_SIZE, /* Ket */ \ 1+LINK_SIZE, /* KetRmax */ \ @@ -1806,6 +1808,8 @@ extern const uint8_t PRIV(utf8_table4)[]; #endif #define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_) +#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_) +#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_) #define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_) #define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_) #define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_) @@ -1824,6 +1828,8 @@ extern const uint8_t PRIV(utf8_table4)[]; #define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_) extern const uint8_t PRIV(OP_lengths)[]; +extern const uint32_t PRIV(callout_end_delims)[]; +extern const uint32_t PRIV(callout_start_delims)[]; extern const pcre2_compile_context PRIV(default_compile_context); extern const pcre2_match_context PRIV(default_match_context); extern const uint8_t PRIV(default_tables)[]; diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 651e273..c944698 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -771,6 +771,9 @@ switch(*cc) #endif return cc + 1; + case OP_CALLOUT_STR: + return cc + GET(cc, 1 + 2*LINK_SIZE); + #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: return cc + GET(cc, 1); @@ -821,7 +824,7 @@ while (cc < ccend) case OP_SCOND: /* Only AUTO_CALLOUT can insert this opcode. We do not intend to support this case. */ - if (cc[1 + LINK_SIZE] == OP_CALLOUT) + if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR) return FALSE; cc += 1 + LINK_SIZE; break; @@ -855,12 +858,13 @@ while (cc < ccend) break; case OP_CALLOUT: + case OP_CALLOUT_STR: if (common->capture_last_ptr == 0) { common->capture_last_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } - cc += 2 + 2 * LINK_SIZE; + cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); break; case OP_THEN_ARG: @@ -6296,7 +6300,7 @@ uint32_t i; if (arguments->callout == NULL) return 0; -callout_block->version = 0; +callout_block->version = 1; /* Offsets in subject. */ callout_block->subject_length = arguments->end - arguments->begin; @@ -6333,6 +6337,10 @@ static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *com DEFINE_COMPILER; backtrack_common *backtrack; sljit_si mov_opcode; +unsigned int callout_length = (*cc == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE); +sljit_sw value1; +sljit_sw value2; PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); @@ -6341,7 +6349,8 @@ allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); SLJIT_ASSERT(common->capture_last_ptr != 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); +value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0; +OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); /* These pointer sized fields temporarly stores internal variables. */ @@ -6352,8 +6361,22 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); if (common->mark_ptr != 0) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_UI : SLJIT_MOV; -OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); -OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1)); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE)); + +if (*cc == OP_CALLOUT) + { + value1 = 0; + value2 = 0; + } +else + { + value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1); + value2 = (callout_length - (1 + 3*LINK_SIZE + 2)); + } + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); /* Needed to save important temporary registers. */ @@ -6372,7 +6395,7 @@ if (common->forced_quit_label == NULL) add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS)); else JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label); -return cc + 2 + 2 * LINK_SIZE; +return cc + callout_length; } #undef CALLOUT_ARG_SIZE @@ -8377,6 +8400,7 @@ while (cc < ccend) break; case OP_CALLOUT: + case OP_CALLOUT_STR: cc = compile_callout_matchingpath(common, cc, parent); break; @@ -9561,6 +9585,7 @@ while (current) break; case OP_CALLOUT: + case OP_CALLOUT_STR: case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: diff --git a/src/pcre2_match.c b/src/pcre2_match.c index ba47929..119a8f6 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -1310,13 +1310,15 @@ for (;;) /* Because of the way auto-callout works during compile, a callout item is inserted between OP_COND and an assertion condition. */ - if (*ecode == OP_CALLOUT) + if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR) { + unsigned int callout_length = (*ecode == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE); + if (mb->callout != NULL) { pcre2_callout_block cb; - cb.version = 0; - cb.callout_number = ecode[1]; + cb.version = 1; cb.capture_top = offset_top/2; cb.capture_last = mb->capture_last & CAPLMASK; cb.offset_vector = mb->ovector; @@ -1325,8 +1327,23 @@ for (;;) cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject); cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject); cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject); - cb.pattern_position = GET(ecode, 2); - cb.next_item_length = GET(ecode, 2 + LINK_SIZE); + cb.pattern_position = GET(ecode, 1); + cb.next_item_length = GET(ecode, 1 + LINK_SIZE); + + if (*ecode == OP_CALLOUT) + { + cb.callout_number = ecode[1 + 2*LINK_SIZE]; + cb.callout_string = NULL; + cb.callout_string_length = 0; + } + else + { + cb.callout_number = 0; + cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1; + cb.callout_string_length = + callout_length - (1 + 3*LINK_SIZE) - 2; + } + if ((rrc = mb->callout(&cb, mb->callout_data)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); @@ -1335,8 +1352,8 @@ for (;;) /* Advance ecode past the callout, so it now points to the condition. We must adjust codelink so that the value of ecode+codelink is unchanged. */ - ecode += PRIV(OP_lengths)[OP_CALLOUT]; - codelink -= PRIV(OP_lengths)[OP_CALLOUT]; + ecode += callout_length; + codelink -= callout_length; } /* Test the various possible conditions */ @@ -1716,26 +1733,47 @@ for (;;) function is able to force a failure. */ case OP_CALLOUT: - if (mb->callout != NULL) + case OP_CALLOUT_STR: { - pcre2_callout_block cb; - cb.version = 0; - cb.callout_number = ecode[1]; - cb.capture_top = offset_top/2; - cb.capture_last = mb->capture_last & CAPLMASK; - cb.offset_vector = mb->ovector; - cb.mark = mb->nomatch_mark; - cb.subject = mb->start_subject; - cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject); - cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject); - cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject); - cb.pattern_position = GET(ecode, 2); - cb.next_item_length = GET(ecode, 2 + LINK_SIZE); - if ((rrc = mb->callout(&cb, mb->callout_data)) > 0) - RRETURN(MATCH_NOMATCH); - if (rrc < 0) RRETURN(rrc); + unsigned int callout_length = (*ecode == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE); + + if (mb->callout != NULL) + { + pcre2_callout_block cb; + cb.version = 1; + cb.callout_number = ecode[LINK_SIZE + 1]; + cb.capture_top = offset_top/2; + cb.capture_last = mb->capture_last & CAPLMASK; + cb.offset_vector = mb->ovector; + cb.mark = mb->nomatch_mark; + cb.subject = mb->start_subject; + cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject); + cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject); + cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject); + cb.pattern_position = GET(ecode, 1); + cb.next_item_length = GET(ecode, 1 + LINK_SIZE); + + if (*ecode == OP_CALLOUT) + { + cb.callout_number = ecode[1 + 2*LINK_SIZE]; + cb.callout_string = NULL; + cb.callout_string_length = 0; + } + else + { + cb.callout_number = 0; + cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1; + cb.callout_string_length = + callout_length - (1 + 3*LINK_SIZE) - 2; + } + + if ((rrc = mb->callout(&cb, mb->callout_data)) > 0) + RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + } + ecode += callout_length; } - ecode += 2 + 2*LINK_SIZE; break; /* Recursion either matches the current regex, or some subexpression. The diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 4c2dd32..0465359 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -305,6 +305,7 @@ for(;;) { PCRE2_SPTR ccode; uint32_t c; + int i; const char *flag = " "; unsigned int extra = 0; @@ -594,8 +595,23 @@ for(;;) goto CLASS_REF_REPEAT; case OP_CALLOUT: - fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), - GET(code, 2 + LINK_SIZE)); + fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE], + GET(code, 1), GET(code, 1 + LINK_SIZE)); + break; + + case OP_CALLOUT_STR: + c = code[1 + 3*LINK_SIZE]; + fprintf(f, " %s %c", OP_names[*code], c); + extra = GET(code, 1 + 2*LINK_SIZE); + print_custring(f, code + 2 + 3*LINK_SIZE); + + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + if (c == PRIV(callout_start_delims)[i]) + { + c = PRIV(callout_end_delims)[i]; + break; + } + fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE)); break; case OP_PROP: @@ -611,7 +627,6 @@ for(;;) case OP_NCLASS: case OP_XCLASS: { - int i; unsigned int min, max; BOOL printmap; BOOL invertmap = FALSE; diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 3f93a12..b476a64 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -199,6 +199,10 @@ for (;;) cc += PRIV(OP_lengths)[*cc]; break; + case OP_CALLOUT_STR: + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + /* Skip over a subpattern that has a {0} or {0,x} quantifier */ case OP_BRAZERO: @@ -935,7 +939,11 @@ do /* Skip over callout */ case OP_CALLOUT: - tcode += 2 + 2*LINK_SIZE; + tcode += PRIV(OP_lengths)[OP_CALLOUT]; + break; + + case OP_CALLOUT_STR: + tcode += GET(tcode, 1 + 2*LINK_SIZE); break; /* Skip over lookbehind and negative lookahead assertions */ diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c index af1c7f0..43de2ec 100644 --- a/src/pcre2_tables.c +++ b/src/pcre2_tables.c @@ -66,6 +66,20 @@ adding to classes. */ const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST }; const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST }; +/* These tables are the pairs of delimiters that are valid for callout string +arguments. For each starting delimiter there must be a matching ending +delimiter, which in fact is different only for bracket-like delimiters. */ + +const uint32_t PRIV(callout_start_delims)[] = { + CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK, + CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN, + CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 }; + +const uint32_t PRIV(callout_end_delims[]) = { + CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK, + CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN, + CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 }; + /************************************************* * Tables for UTF-8 support * diff --git a/src/pcre2test.c b/src/pcre2test.c index 85f80ca..0e3bac1 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -4519,9 +4519,9 @@ return capcount; /* Called from a PCRE2 library as a result of the (?C) item. We print out where we are in the match. Yield zero unless more callouts than the fail count, or the callout data is not zero. The only differences in the callout block for -different code unit widths are that the pointers to the subject and the most -recent MARK point to strings of the appropriate width. Casts can be used to -deal with this. +different code unit widths are that the pointers to the subject, the most +recent MARK, and a callout argument string point to strings of the appropriate +width. Casts can be used to deal with this. Argument: a pointer to a callout block Return: @@ -4535,11 +4535,31 @@ BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; FILE *f = (first_callout || callout_capture)? outfile : NULL; +/* For a callout with a string argument, show the string first because there +isn't a tidy way to fit it in the rest of the data. */ + +if (cb->callout_string != NULL) + { + uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); + fprintf(f, "Callout: %c", delimiter); + PCHARSV(cb->callout_string, 0, + cb->callout_string_length, utf, outfile); + for (i = 0; callout_start_delims[i] != 0; i++) + if (delimiter == callout_start_delims[i]) + { + delimiter = callout_end_delims[i]; + break; + } + fprintf(outfile, "%c", delimiter); + if (!callout_capture) fprintf(f, "\n"); + } + +/* Show captured strings if required */ + if (callout_capture) { - fprintf(f, "Callout %d: last capture = %d\n", - cb->callout_number, cb->capture_last); - + if (cb->callout_string == NULL) fprintf(f, "Callout %d:", cb->callout_number); + fprintf(f, " last capture = %d\n", cb->capture_last); for (i = 0; i < cb->capture_top * 2; i += 2) { fprintf(f, "%2d: ", i/2); @@ -4553,7 +4573,7 @@ if (callout_capture) fprintf(f, "\n"); } } - + /* Re-print the subject in canonical form, the first time or if giving full datails. On subsequent calls in the same match, we use pchars just to find the printed lengths of the substrings. */ @@ -4572,19 +4592,22 @@ PCHARSV(cb->subject, cb->current_position, if (f != NULL) fprintf(f, "\n"); -/* Always print appropriate indicators, with callout number if not already -shown. For automatic callouts, show the pattern offset. */ +/* For automatic callouts, show the pattern offset. Otherwise, for a numerical +callout whose number has not already been shown with captured strings, show the +number here. A callout with a string argument has been displayed above. */ if (cb->callout_number == 255) { fprintf(outfile, "%+3d ", (int)cb->pattern_position); if (cb->pattern_position > 99) fprintf(outfile, "\n "); } -else +else { - if (callout_capture) fprintf(outfile, " "); + if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " "); else fprintf(outfile, "%3d ", cb->callout_number); } + +/* Now show position indicators */ for (i = 0; i < pre_start; i++) fprintf(outfile, " "); fprintf(outfile, "^"); diff --git a/testdata/testinput2 b/testdata/testinput2 index b264415..11f6dd1 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4178,4 +4178,32 @@ a random value. /Ix /((?+1)(\1))/B +# Callouts with string arguments + +/a(?C"/ + +/a(?C"a/ + +/a(?C"a"/ + +/a(?C"a"bcde(?C"b")xyz/ + +/a(?C"a)b""c")/B + +/ab(?C" any text with spaces ")cde/B + abcde + 12abcde + +/^a(b)c(?C1)def/ + abcdef + +/^a(b)c(?C"AB")def/ + abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture + +/^a(b)c(?C{AB})def/B + abcdef\=callout_capture + # End of testinput2 diff --git a/testdata/testinput6 b/testdata/testinput6 index 56495d1..60c69b7 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4811,4 +4811,20 @@ /a(b)c(d)/ abc\=ph,copy=0,copy=1,getall +/ab(?C" any text with spaces ")cde/B + abcde + 12abcde + +/^a(b)c(?C1)def/ + abcdef + +/^a(b)c(?C"AB")def/ + abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture + +/^a(b)c(?C{AB})def/B + abcdef\=callout_capture + # End of testinput6 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ba7c05f..552f953 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -3538,7 +3538,7 @@ Subject length lower bound = 2 Failed: error 138 at offset 6: number after (?C is greater than 255 /(?Cab)xx/I -Failed: error 139 at offset 3: closing parenthesis for (?C expected +Failed: error 182 at offset 3: unrecognized string delimiter follows (?C /(?C12vr)x/I Failed: error 139 at offset 5: closing parenthesis for (?C expected @@ -13969,4 +13969,95 @@ Matched, but too many substrings End ------------------------------------------------------------------ +# Callouts with string arguments + +/a(?C"/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a"/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a"bcde(?C"b")xyz/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a)b""c")/B +------------------------------------------------------------------ + Bra + a + CalloutStr "a)b"c" 13 0 + Ket + End +------------------------------------------------------------------ + +/ab(?C" any text with spaces ")cde/B +------------------------------------------------------------------ + Bra + ab + CalloutStr " any text with spaces " 30 1 + cde + Ket + End +------------------------------------------------------------------ + abcde +Callout: " any text with spaces " +--->abcde + ^ ^ c + 0: abcde + 12abcde +Callout: " any text with spaces " +--->12abcde + ^ ^ c + 0: abcde + +/^a(b)c(?C1)def/ + abcdef +--->abcdef + 1 ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C"AB")def/ + abcdef +Callout: "AB" +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C1)def/ + abcdef\=callout_capture +Callout 1: last capture = 1 + 0: + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C{AB})def/B +------------------------------------------------------------------ + Bra + ^ + a + CBra 1 + b + Ket + c + CalloutStr {AB} 14 1 + def + Ket + End +------------------------------------------------------------------ + abcdef\=callout_capture +Callout: {AB} last capture = 1 + 0: + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + # End of testinput2 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index ef3cdcd..98054e1 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7773,4 +7773,66 @@ Partial match: abc Copy substring 1 failed (-2): partial match get substring list failed (-2): partial match +/ab(?C" any text with spaces ")cde/B +------------------------------------------------------------------ + Bra + ab + CalloutStr " any text with spaces " 30 1 + cde + Ket + End +------------------------------------------------------------------ + abcde +Callout: " any text with spaces " +--->abcde + ^ ^ c + 0: abcde + 12abcde +Callout: " any text with spaces " +--->12abcde + ^ ^ c + 0: abcde + +/^a(b)c(?C1)def/ + abcdef +--->abcdef + 1 ^ ^ d + 0: abcdef + +/^a(b)c(?C"AB")def/ + abcdef +Callout: "AB" +--->abcdef + ^ ^ d + 0: abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture +Callout 1: last capture = 0 + 0: +--->abcdef + ^ ^ d + 0: abcdef + +/^a(b)c(?C{AB})def/B +------------------------------------------------------------------ + Bra + ^ + a + CBra 1 + b + Ket + c + CalloutStr {AB} 14 1 + def + Ket + End +------------------------------------------------------------------ + abcdef\=callout_capture +Callout: {AB} last capture = 0 + 0: +--->abcdef + ^ ^ d + 0: abcdef + # End of testinput6