Add callout_error to pcre2test and stop the fuzzer after 100 callouts.

This commit is contained in:
Philip.Hazel 2016-12-28 15:05:48 +00:00
parent f676362977
commit 62659dd2f3
6 changed files with 69 additions and 21 deletions

View File

@ -266,6 +266,9 @@ auto_callout".
pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide
characters to match (for example, /[\s[:^ascii:]]/).
40. The callout_error modifier has been added to pcre2test to make it possible
to return PCRE2_ERROR_CALLOUT from a callout.
Version 10.22 29-July-2016
--------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "23 December 2016" "PCRE 10.23"
.TH PCRE2TEST 1 "28 December 2016" "PCRE 10.23"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -1040,6 +1040,7 @@ pattern.
altglobal alternative global matching
callout_capture show captures at callout time
callout_data=<n> set a value to pass via callouts
callout_error=<n>[:<m>] control callout error
callout_fail=<n>[:<m>] control callout failure
callout_none do not supply a callout function
copy=<number or name> copy captured substring
@ -1133,14 +1134,20 @@ does no capturing); it is ignored, with a warning message, if present.
.sp
A callout function is supplied when \fBpcre2test\fP calls the library matching
functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
set, the current captured groups are output when a callout occurs.
set, the current captured groups are output when a callout occurs. The default
return from the callout function is zero, which allows matching to continue.
.P
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
only one number, 1 is returned instead of 0 when a callout of that number is
reached. If two numbers are given, 1 is returned when callout <n> is reached
for the <m>th time. Note that callouts with string arguments are always given
the number zero. See "Callouts" below for a description of the output when a
callout it taken.
only one number, 1 is returned instead of 0 (causing matching to backtrack)
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
is returned when callout <n> is reached and there have been at least <m>
callouts. The \fBcallout_error\fP modifier is similar, except that
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
aborted. If both these modifiers are set for the same callout number,
\fBcallout_error\fP takes precedence.
.P
Note that callouts with string arguments are always given the number zero. See
"Callouts" below for a description of the output when a callout it taken.
.P
The \fBcallout_data\fP modifier can be given an unsigned or a negative number.
This is set as the "user data" that is passed to the matching function, and
@ -1751,6 +1758,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 23 December 2016
Last updated: 28 December 2016
Copyright (c) 1997-2016 University of Cambridge.
.fi

View File

@ -32,6 +32,17 @@ Written by Philip Hazel, October 2016
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
/* This is the callout function. Its only purpose is to halt matching if there
are more than 100 callouts, as one way of stopping too much time being spent on
fruitless matches. The callout data is a pointer to the counter. */
static int callout_function(pcre2_callout_block *cb, void *callout_data)
{
(void)cb; /* Avoid unused parameter warning */
*((uint32_t *)callout_data) += 1;
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
}
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */
@ -77,6 +88,7 @@ likewise do the match with and without the options. */
for (i = 0; i < 2; i++)
{
uint32_t callout_count;
int errorcode;
PCRE2_SIZE erroroffset;
pcre2_code *code;
@ -147,8 +159,9 @@ for (i = 0; i < 2; i++)
#endif
return 0;
}
pcre2_set_match_limit(match_context, 100);
pcre2_set_recursion_limit(match_context, 100);
(void)pcre2_set_match_limit(match_context, 100);
(void)pcre2_set_recursion_limit(match_context, 100);
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
}
/* Match twice, with and without options */
@ -168,6 +181,7 @@ for (i = 0; i < 2; i++)
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif
callout_count = 0;
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)size, 0,
match_options, match_data, match_context);

View File

@ -175,7 +175,7 @@ void vms_setsymbol( char *, char *, int );
#endif
#endif
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@ -495,6 +495,7 @@ typedef struct datctl { /* Structure for data line modifiers. */
uint32_t control; /* Must be in same position as patctl */
uint32_t control2; /* Must be in same position as patctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
uint32_t cerror[2];
uint32_t cfail[2];
int32_t callout_data;
int32_t copy_numbers[MAXCPYGET];
@ -549,6 +550,7 @@ static modstruct modlist[] = {
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
{ "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
{ "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
{ "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
@ -5459,8 +5461,17 @@ if (callout_data_ptr != NULL)
}
}
return (cb->callout_number != dat_datctl.cfail[0])? 0 :
(++callout_count >= dat_datctl.cfail[1])? 1 : 0;
callout_count++;
if (cb->callout_number == dat_datctl.cerror[0] &&
callout_count >= dat_datctl.cerror[1])
return PCRE2_ERROR_CALLOUT;
if (cb->callout_number == dat_datctl.cfail[0] &&
callout_count >= dat_datctl.cfail[1])
return 1;
return 0;
}
@ -6123,7 +6134,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
regmatch_t *pmatch = NULL;
const char *msg = "** Ignored with POSIX interface:";
if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
prmsg(&msg, "callout_error");
if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
prmsg(&msg, "callout_fail");
if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
prmsg(&msg, "copy");
@ -7347,7 +7360,8 @@ memset(&def_datctl, 0, sizeof(datctl));
def_datctl.oveccount = DEFAULT_OVECCOUNT;
def_datctl.copy_numbers[0] = -1;
def_datctl.get_numbers[0] = -1;
def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
/* Scan command line options. */

7
testdata/testinput2 vendored
View File

@ -1062,8 +1062,8 @@
/(?C0)(abc(?C1))*/I
abcabcabc
abcabc\=callout_fail=1:3
abcabcabc\=callout_fail=1:3
abcabc\=callout_fail=1:4
abcabcabc\=callout_fail=1:4
/(\d{3}(?C))*/I
123\=callout_capture
@ -4956,4 +4956,7 @@ a)"xI
/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/
/abcd/auto_callout
abcd\=callout_error=255:2
# End of testinput2

11
testdata/testoutput2 vendored
View File

@ -3550,14 +3550,14 @@ Subject length lower bound = 0
1 ^ ^ )*
0: abcabcabc
1: abc
abcabc\=callout_fail=1:3
abcabc\=callout_fail=1:4
--->abcabc
0 ^ (
1 ^ ^ )*
1 ^ ^ )*
0: abcabc
1: abc
abcabcabc\=callout_fail=1:3
abcabcabc\=callout_fail=1:4
--->abcabcabc
0 ^ (
1 ^ ^ )*
@ -15439,6 +15439,13 @@ Failed: error 128 at offset 11: assertion expected after (?( or (?(?C)
/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/
/abcd/auto_callout
abcd\=callout_error=255:2
--->abcd
+0 ^ a
+1 ^^ b
Failed: error -37: callout error code
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data