diff --git a/ChangeLog b/ChangeLog index 0869658..3eb4fe1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -266,6 +266,9 @@ auto_callout". pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide characters to match (for example, /[\s[:^ascii:]]/). +40. The callout_error modifier has been added to pcre2test to make it possible +to return PCRE2_ERROR_CALLOUT from a callout. + Version 10.22 29-July-2016 -------------------------- diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 9eb1813..2343519 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "23 December 2016" "PCRE 10.23" +.TH PCRE2TEST 1 "28 December 2016" "PCRE 10.23" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -1040,6 +1040,7 @@ pattern. altglobal alternative global matching callout_capture show captures at callout time callout_data= set a value to pass via callouts + callout_error=[:] control callout error callout_fail=[:] control callout failure callout_none do not supply a callout function copy= copy captured substring @@ -1133,14 +1134,20 @@ does no capturing); it is ignored, with a warning message, if present. .sp A callout function is supplied when \fBpcre2test\fP calls the library matching functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is -set, the current captured groups are output when a callout occurs. +set, the current captured groups are output when a callout occurs. The default +return from the callout function is zero, which allows matching to continue. .P The \fBcallout_fail\fP modifier can be given one or two numbers. If there is -only one number, 1 is returned instead of 0 when a callout of that number is -reached. If two numbers are given, 1 is returned when callout is reached -for the th time. Note that callouts with string arguments are always given -the number zero. See "Callouts" below for a description of the output when a -callout it taken. +only one number, 1 is returned instead of 0 (causing matching to backtrack) +when a callout of that number is reached. If two numbers (:) are given, 1 +is returned when callout is reached and there have been at least +callouts. The \fBcallout_error\fP modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +\fBcallout_error\fP takes precedence. +.P +Note that callouts with string arguments are always given the number zero. See +"Callouts" below for a description of the output when a callout it taken. .P The \fBcallout_data\fP modifier can be given an unsigned or a negative number. This is set as the "user data" that is passed to the matching function, and @@ -1751,6 +1758,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 December 2016 +Last updated: 28 December 2016 Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index e228a83..59af5d9 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -32,6 +32,17 @@ Written by Philip Hazel, October 2016 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) +/* This is the callout function. Its only purpose is to halt matching if there +are more than 100 callouts, as one way of stopping too much time being spent on +fruitless matches. The callout data is a pointer to the counter. */ + +static int callout_function(pcre2_callout_block *cb, void *callout_data) +{ +(void)cb; /* Avoid unused parameter warning */ +*((uint32_t *)callout_data) += 1; +return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0; +} + /* Putting in this apparently unnecessary prototype prevents gcc from giving a "no previous prototype" warning when compiling at high warning level. */ @@ -77,6 +88,7 @@ likewise do the match with and without the options. */ for (i = 0; i < 2; i++) { + uint32_t callout_count; int errorcode; PCRE2_SIZE erroroffset; pcre2_code *code; @@ -147,8 +159,9 @@ for (i = 0; i < 2; i++) #endif return 0; } - pcre2_set_match_limit(match_context, 100); - pcre2_set_recursion_limit(match_context, 100); + (void)pcre2_set_match_limit(match_context, 100); + (void)pcre2_set_recursion_limit(match_context, 100); + (void)pcre2_set_callout(match_context, callout_function, &callout_count); } /* Match twice, with and without options */ @@ -168,6 +181,7 @@ for (i = 0; i < 2; i++) ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); #endif + callout_count = 0; errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)size, 0, match_options, match_data, match_context); diff --git a/src/pcre2test.c b/src/pcre2test.c index bd0f372..d747337 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -175,7 +175,7 @@ void vms_setsymbol( char *, char *, int ); #endif #endif -#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */ +#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */ #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ @@ -429,7 +429,7 @@ so many of them that they are split into two fields. */ #define CTL_POSIX_NOSUB 0x00800000u #define CTL_PUSH 0x01000000u /* These three must be */ #define CTL_PUSHCOPY 0x02000000u /* all in the same */ -#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */ +#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */ #define CTL_STARTCHAR 0x08000000u #define CTL_USE_LENGTH 0x10000000u /* Same word as HEXPAT */ #define CTL_UTF8_INPUT 0x20000000u @@ -495,6 +495,7 @@ typedef struct datctl { /* Structure for data line modifiers. */ uint32_t control; /* Must be in same position as patctl */ uint32_t control2; /* Must be in same position as patctl */ uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ + uint32_t cerror[2]; uint32_t cfail[2]; int32_t callout_data; int32_t copy_numbers[MAXCPYGET]; @@ -549,6 +550,7 @@ static modstruct modlist[] = { { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, + { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) }, { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, @@ -5229,7 +5231,7 @@ if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code); } else - { + { PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++], compiled_code); } } @@ -5459,8 +5461,17 @@ if (callout_data_ptr != NULL) } } -return (cb->callout_number != dat_datctl.cfail[0])? 0 : - (++callout_count >= dat_datctl.cfail[1])? 1 : 0; +callout_count++; + +if (cb->callout_number == dat_datctl.cerror[0] && + callout_count >= dat_datctl.cerror[1]) + return PCRE2_ERROR_CALLOUT; + +if (cb->callout_number == dat_datctl.cfail[0] && + callout_count >= dat_datctl.cfail[1]) + return 1; + +return 0; } @@ -6123,7 +6134,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0) regmatch_t *pmatch = NULL; const char *msg = "** Ignored with POSIX interface:"; - if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET) + if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET) + prmsg(&msg, "callout_error"); + if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET) prmsg(&msg, "callout_fail"); if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0) prmsg(&msg, "copy"); @@ -7347,7 +7360,8 @@ memset(&def_datctl, 0, sizeof(datctl)); def_datctl.oveccount = DEFAULT_OVECCOUNT; def_datctl.copy_numbers[0] = -1; def_datctl.get_numbers[0] = -1; -def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET; +def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; +def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; /* Scan command line options. */ diff --git a/testdata/testinput2 b/testdata/testinput2 index bd13a85..f711fbe 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -1062,8 +1062,8 @@ /(?C0)(abc(?C1))*/I abcabcabc - abcabc\=callout_fail=1:3 - abcabcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 + abcabcabc\=callout_fail=1:4 /(\d{3}(?C))*/I 123\=callout_capture @@ -4956,4 +4956,7 @@ a)"xI /((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +/abcd/auto_callout + abcd\=callout_error=255:2 + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 57bf862..d23840a 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -3550,14 +3550,14 @@ Subject length lower bound = 0 1 ^ ^ )* 0: abcabcabc 1: abc - abcabc\=callout_fail=1:3 + abcabc\=callout_fail=1:4 --->abcabc 0 ^ ( 1 ^ ^ )* 1 ^ ^ )* 0: abcabc 1: abc - abcabcabc\=callout_fail=1:3 + abcabcabc\=callout_fail=1:4 --->abcabcabc 0 ^ ( 1 ^ ^ )* @@ -15439,6 +15439,13 @@ Failed: error 128 at offset 11: assertion expected after (?( or (?(?C) /((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +/abcd/auto_callout + abcd\=callout_error=255:2 +--->abcd + +0 ^ a + +1 ^^ b +Failed: error -37: callout error code + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data