From 78aff8c80fb20f78dcbc43b1771daf966ad8c7e2 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sat, 6 Feb 2016 16:40:59 +0000 Subject: [PATCH] Fix pcre2test loop when a callout is in an initial lookbehind. --- ChangeLog | 4 ++++ doc/pcre2test.1 | 8 +++++--- src/pcre2test.c | 38 ++++++++++++++++++++++++++++---------- testdata/testinput2 | 5 +++++ testdata/testoutput2 | 15 +++++++++++++++ 5 files changed, 57 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2cfa580..3ce0207 100644 --- a/ChangeLog +++ b/ChangeLog @@ -54,6 +54,10 @@ are not, an error message is output and the pcre2test run is abandoned. The message points out the possibility of a mis-linking. Hopefully this will avoid some head-scratching the next time this happens. +11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind +assertion, caused pcre2test to output a very large number of spaces when the +callout was taken, making the program appearing to loop. + Version 10.21 12-January-2016 ----------------------------- diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 47be7f6..f6454f6 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "31 January 2016" "PCRE 10.22" +.TH PCRE2TEST 1 "06 February 2016" "PCRE 10.22" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -1492,7 +1492,9 @@ item to be tested. For example: This output indicates that callout number 0 occurred for a match attempt starting at the fourth character of the subject string, when the pointer was at the seventh character, and when the next pattern item was \ed. Just -one circumflex is output if the start and current positions are the same. +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. .P Callouts numbered 255 are assumed to be automatic callouts, inserted as a result of the \fB/auto_callout\fP pattern modifier. In this case, instead of @@ -1657,6 +1659,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 31 January 2016 +Last updated: 06 February 2016 Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/src/pcre2test.c b/src/pcre2test.c index 6bf286e..da0ccfb 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -2548,12 +2548,13 @@ return (int)(pp - p); /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. For printing *MARK strings, a negative length is given. If handed a NULL file, -just counts chars without printing. */ +just counts chars without printing (because pchar() does that). */ static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) { uint32_t c = 0; int yield = 0; + if (length < 0) length = p[-1]; while (length-- > 0) { @@ -2571,6 +2572,7 @@ while (length-- > 0) c = *p++; yield += pchar(c, utf, f); } + return yield; } #endif @@ -5052,6 +5054,7 @@ static int callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) { uint32_t i, pre_start, post_start, subject_length; +PCRE2_SIZE current_position; BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; @@ -5102,22 +5105,37 @@ if (callout_capture) } } -/* Re-print the subject in canonical form, the first time or if giving full -datails. On subsequent calls in the same match, we use pchars just to find the -printed lengths of the substrings. */ +/* Re-print the subject in canonical form (with escapes for non-printing +characters), the first time, or if giving full details. On subsequent calls in +the same match, we use PCHARS() just to find the printed lengths of the +substrings. */ if (f != NULL) fprintf(f, "--->"); +/* The subject before the match start. */ + PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); +/* If a lookbehind is involved, the current position may be earlier than the +match start. If so, use the match start instead. */ + +current_position = (cb->current_position >= cb->start_match)? + cb->current_position : cb->start_match; + +/* The subject between the match start and the current position. */ + PCHARS(post_start, cb->subject, cb->start_match, - cb->current_position - cb->start_match, utf, f); + current_position - cb->start_match, utf, f); + +/* Print from the current position to the end. */ + +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, + utf, f); + +/* Calculate the total subject printed length (no print). */ PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); -PCHARSV(cb->subject, cb->current_position, - cb->subject_length - cb->current_position, utf, f); - if (f != NULL) fprintf(f, "\n"); /* For automatic callouts, show the pattern offset. Otherwise, for a numerical @@ -7098,7 +7116,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) struct rlimit rlim; if (U32OVERFLOW(uli)) { - fprintf(stderr, "+++ Argument for -S is too big\n"); + fprintf(stderr, "** Argument for -S is too big\n"); exit(1); } stack_size = (uint32_t)uli; @@ -7150,7 +7168,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) { if (U32OVERFLOW(uli)) { - fprintf(stderr, "+++ Argument for %s is too big\n", arg); + fprintf(stderr, "** Argument for %s is too big\n", arg); exit(1); } timeitm = (int)uli; diff --git a/testdata/testinput2 b/testdata/testinput2 index 51a1bfa..b0882c9 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4806,4 +4806,9 @@ a)"xI /(?J)(?'a'))(?'a')/ +/(?<=((?C)0))/ + 9010 +\= Expect no match + abc + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ce116fc..bd7eac9 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15168,4 +15168,19 @@ MK: A\x00b /(?J)(?'a'))(?'a')/ Failed: error 122 at offset 10: unmatched closing parenthesis +/(?<=((?C)0))/ + 9010 +--->9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 +\= Expect no match + abc +--->abc + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + # End of testinput2