Fix pcre2test loop when a callout is in an initial lookbehind.

This commit is contained in:
Philip.Hazel 2016-02-06 16:40:59 +00:00
parent 112d5b77cc
commit 78aff8c80f
5 changed files with 57 additions and 13 deletions

View File

@ -54,6 +54,10 @@ are not, an error message is output and the pcre2test run is abandoned. The
message points out the possibility of a mis-linking. Hopefully this will avoid message points out the possibility of a mis-linking. Hopefully this will avoid
some head-scratching the next time this happens. some head-scratching the next time this happens.
11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind
assertion, caused pcre2test to output a very large number of spaces when the
callout was taken, making the program appearing to loop.
Version 10.21 12-January-2016 Version 10.21 12-January-2016
----------------------------- -----------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "31 January 2016" "PCRE 10.22" .TH PCRE2TEST 1 "06 February 2016" "PCRE 10.22"
.SH NAME .SH NAME
pcre2test - a program for testing Perl-compatible regular expressions. pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -1492,7 +1492,9 @@ item to be tested. For example:
This output indicates that callout number 0 occurred for a match attempt This output indicates that callout number 0 occurred for a match attempt
starting at the fourth character of the subject string, when the pointer was at starting at the fourth character of the subject string, when the pointer was at
the seventh character, and when the next pattern item was \ed. Just the seventh character, and when the next pattern item was \ed. Just
one circumflex is output if the start and current positions are the same. one circumflex is output if the start and current positions are the same, or if
the current position precedes the start position, which can happen if the
callout is in a lookbehind assertion.
.P .P
Callouts numbered 255 are assumed to be automatic callouts, inserted as a Callouts numbered 255 are assumed to be automatic callouts, inserted as a
result of the \fB/auto_callout\fP pattern modifier. In this case, instead of result of the \fB/auto_callout\fP pattern modifier. In this case, instead of
@ -1657,6 +1659,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 31 January 2016 Last updated: 06 February 2016
Copyright (c) 1997-2016 University of Cambridge. Copyright (c) 1997-2016 University of Cambridge.
.fi .fi

View File

@ -2548,12 +2548,13 @@ return (int)(pp - p);
/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
For printing *MARK strings, a negative length is given. If handed a NULL file, For printing *MARK strings, a negative length is given. If handed a NULL file,
just counts chars without printing. */ just counts chars without printing (because pchar() does that). */
static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
{ {
uint32_t c = 0; uint32_t c = 0;
int yield = 0; int yield = 0;
if (length < 0) length = p[-1]; if (length < 0) length = p[-1];
while (length-- > 0) while (length-- > 0)
{ {
@ -2571,6 +2572,7 @@ while (length-- > 0)
c = *p++; c = *p++;
yield += pchar(c, utf, f); yield += pchar(c, utf, f);
} }
return yield; return yield;
} }
#endif #endif
@ -5052,6 +5054,7 @@ static int
callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
{ {
uint32_t i, pre_start, post_start, subject_length; uint32_t i, pre_start, post_start, subject_length;
PCRE2_SIZE current_position;
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
@ -5102,22 +5105,37 @@ if (callout_capture)
} }
} }
/* Re-print the subject in canonical form, the first time or if giving full /* Re-print the subject in canonical form (with escapes for non-printing
datails. On subsequent calls in the same match, we use pchars just to find the characters), the first time, or if giving full details. On subsequent calls in
printed lengths of the substrings. */ the same match, we use PCHARS() just to find the printed lengths of the
substrings. */
if (f != NULL) fprintf(f, "--->"); if (f != NULL) fprintf(f, "--->");
/* The subject before the match start. */
PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
/* If a lookbehind is involved, the current position may be earlier than the
match start. If so, use the match start instead. */
current_position = (cb->current_position >= cb->start_match)?
cb->current_position : cb->start_match;
/* The subject between the match start and the current position. */
PCHARS(post_start, cb->subject, cb->start_match, PCHARS(post_start, cb->subject, cb->start_match,
cb->current_position - cb->start_match, utf, f); current_position - cb->start_match, utf, f);
/* Print from the current position to the end. */
PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
utf, f);
/* Calculate the total subject printed length (no print). */
PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
PCHARSV(cb->subject, cb->current_position,
cb->subject_length - cb->current_position, utf, f);
if (f != NULL) fprintf(f, "\n"); if (f != NULL) fprintf(f, "\n");
/* For automatic callouts, show the pattern offset. Otherwise, for a numerical /* For automatic callouts, show the pattern offset. Otherwise, for a numerical
@ -7098,7 +7116,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
struct rlimit rlim; struct rlimit rlim;
if (U32OVERFLOW(uli)) if (U32OVERFLOW(uli))
{ {
fprintf(stderr, "+++ Argument for -S is too big\n"); fprintf(stderr, "** Argument for -S is too big\n");
exit(1); exit(1);
} }
stack_size = (uint32_t)uli; stack_size = (uint32_t)uli;
@ -7150,7 +7168,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
{ {
if (U32OVERFLOW(uli)) if (U32OVERFLOW(uli))
{ {
fprintf(stderr, "+++ Argument for %s is too big\n", arg); fprintf(stderr, "** Argument for %s is too big\n", arg);
exit(1); exit(1);
} }
timeitm = (int)uli; timeitm = (int)uli;

5
testdata/testinput2 vendored
View File

@ -4806,4 +4806,9 @@ a)"xI
/(?J)(?'a'))(?'a')/ /(?J)(?'a'))(?'a')/
/(?<=((?C)0))/
9010
\= Expect no match
abc
# End of testinput2 # End of testinput2

15
testdata/testoutput2 vendored
View File

@ -15168,4 +15168,19 @@ MK: A\x00b
/(?J)(?'a'))(?'a')/ /(?J)(?'a'))(?'a')/
Failed: error 122 at offset 10: unmatched closing parenthesis Failed: error 122 at offset 10: unmatched closing parenthesis
/(?<=((?C)0))/
9010
--->9010
0 ^ 0
0 ^ 0
0:
1: 0
\= Expect no match
abc
--->abc
0 ^ 0
0 ^ 0
0 ^ 0
No match
# End of testinput2 # End of testinput2