370 lines
12 KiB
C
370 lines
12 KiB
C
/***************************************************************************
|
|
Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
|
|
tries to compile and match it, deriving options from the string itself. If
|
|
STANDALONE is defined, a main program that calls the driver with the contents
|
|
of specified files is compiled, and commentary on what is happening is output.
|
|
If an argument starts with '=' the rest of it it is taken as a literal string
|
|
rather than a file name. This allows easy testing of short strings.
|
|
|
|
Written by Philip Hazel, October 2016
|
|
***************************************************************************/
|
|
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
|
#include "pcre2.h"
|
|
|
|
#define MAX_MATCH_SIZE 1000
|
|
|
|
#define DFA_WORKSPACE_COUNT 100
|
|
|
|
#define ALLOWED_COMPILE_OPTIONS \
|
|
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
|
|
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
|
|
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
|
|
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
|
|
PCRE2_NO_AUTO_CAPTURE| \
|
|
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
|
|
PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
|
|
PCRE2_UTF)
|
|
|
|
#define ALLOWED_MATCH_OPTIONS \
|
|
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
|
|
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
|
|
|
|
/* This is the callout function. Its only purpose is to halt matching if there
|
|
are more than 100 callouts, as one way of stopping too much time being spent on
|
|
fruitless matches. The callout data is a pointer to the counter. */
|
|
|
|
static int callout_function(pcre2_callout_block *cb, void *callout_data)
|
|
{
|
|
(void)cb; /* Avoid unused parameter warning */
|
|
*((uint32_t *)callout_data) += 1;
|
|
return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
|
|
}
|
|
|
|
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
|
|
"no previous prototype" warning when compiling at high warning level. */
|
|
|
|
int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
|
|
|
|
/* Here's the driving function. */
|
|
|
|
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
|
|
{
|
|
uint32_t compile_options;
|
|
uint32_t match_options;
|
|
pcre2_match_data *match_data = NULL;
|
|
pcre2_match_context *match_context = NULL;
|
|
size_t match_size;
|
|
int dfa_workspace[DFA_WORKSPACE_COUNT];
|
|
int r1, r2;
|
|
int i;
|
|
|
|
if (size < 1) return 0;
|
|
|
|
/* Limiting the length of the subject for matching stops fruitless searches
|
|
in large trees taking too much time. */
|
|
|
|
match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
|
|
|
|
/* Figure out some options to use. Initialize the random number to ensure
|
|
repeatability. Ensure that we get a 32-bit unsigned random number for testing
|
|
options. (RAND_MAX is required to be at least 32767, but is commonly
|
|
2147483647, which excludes the top bit.) */
|
|
|
|
srand((unsigned int)(data[size/2]));
|
|
r1 = rand();
|
|
r2 = rand();
|
|
|
|
/* Ensure that all undefined option bits are zero (waste of time trying them)
|
|
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
|
|
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
|
|
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
|
|
\C in random patterns is highly likely to cause a crash. */
|
|
|
|
compile_options =
|
|
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
|
|
PCRE2_NEVER_BACKSLASH_C;
|
|
|
|
match_options =
|
|
((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
|
|
|
|
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
|
|
allowed together and just give an immediate error return. */
|
|
|
|
if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
|
|
match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
|
|
|
|
/* Do the compile with and without the options, and after a successful compile,
|
|
likewise do the match with and without the options. */
|
|
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
uint32_t callout_count;
|
|
int errorcode;
|
|
PCRE2_SIZE erroroffset;
|
|
pcre2_code *code;
|
|
|
|
#ifdef STANDALONE
|
|
printf("Compile options %.8x never_backslash_c", compile_options);
|
|
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
|
((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
|
|
((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
|
|
((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
|
|
((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
|
|
((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
|
|
((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
|
|
((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
|
|
((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
|
|
((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
|
|
((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
|
|
((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
|
|
((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
|
|
((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
|
|
((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
|
|
((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
|
|
((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
|
|
((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
|
|
((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
|
|
((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
|
|
((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
|
|
((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
|
|
((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
|
|
((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
|
|
#endif
|
|
|
|
code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
|
|
&errorcode, &erroroffset, NULL);
|
|
|
|
/* Compilation succeeded */
|
|
|
|
if (code != NULL)
|
|
{
|
|
int j;
|
|
uint32_t save_match_options = match_options;
|
|
|
|
#ifdef SUPPORT_JIT
|
|
pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
|
|
#endif
|
|
|
|
/* Create match data and context blocks only when we first need them. Set
|
|
low match and depth limits to avoid wasting too much searching large
|
|
pattern trees. Almost all matches are going to fail. */
|
|
|
|
if (match_data == NULL)
|
|
{
|
|
match_data = pcre2_match_data_create(32, NULL);
|
|
if (match_data == NULL)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("** Failed to create match data block\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (match_context == NULL)
|
|
{
|
|
match_context = pcre2_match_context_create(NULL);
|
|
if (match_context == NULL)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("** Failed to create match context block\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
(void)pcre2_set_match_limit(match_context, 100);
|
|
(void)pcre2_set_depth_limit(match_context, 100);
|
|
(void)pcre2_set_callout(match_context, callout_function, &callout_count);
|
|
}
|
|
|
|
/* Match twice, with and without options. */
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("Match options %.8x", match_options);
|
|
printf("%s%s%s%s%s%s%s%s%s%s\n",
|
|
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
|
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
|
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
|
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
|
#endif
|
|
|
|
callout_count = 0;
|
|
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
|
|
match_options, match_data, match_context);
|
|
|
|
#ifdef STANDALONE
|
|
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
|
}
|
|
#endif
|
|
|
|
match_options = 0; /* For second time */
|
|
}
|
|
|
|
/* Match with DFA twice, with and without options. */
|
|
|
|
match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
|
|
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
#ifdef STANDALONE
|
|
printf("DFA match options %.8x", match_options);
|
|
printf("%s%s%s%s%s%s%s%s%s\n",
|
|
((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
|
|
((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
|
|
((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
|
|
((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
|
|
((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
|
|
((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
|
|
((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
|
|
((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
|
|
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
|
|
#endif
|
|
|
|
callout_count = 0;
|
|
errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
|
|
(PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
|
|
dfa_workspace, DFA_WORKSPACE_COUNT);
|
|
|
|
#ifdef STANDALONE
|
|
if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
printf("Match failed: error %d: %s\n", errorcode, buffer);
|
|
}
|
|
#endif
|
|
|
|
match_options = 0; /* For second time */
|
|
}
|
|
|
|
match_options = save_match_options; /* Reset for the second compile */
|
|
pcre2_code_free(code);
|
|
}
|
|
|
|
/* Compilation failed */
|
|
|
|
else
|
|
{
|
|
unsigned char buffer[256];
|
|
pcre2_get_error_message(errorcode, buffer, 256);
|
|
#ifdef STANDALONE
|
|
printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
|
|
#else
|
|
if (strstr((const char *)buffer, "internal error") != NULL) abort();
|
|
#endif
|
|
}
|
|
|
|
compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
|
|
}
|
|
|
|
if (match_data != NULL) pcre2_match_data_free(match_data);
|
|
if (match_context != NULL) pcre2_match_context_free(match_context);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Optional main program. */
|
|
|
|
#ifdef STANDALONE
|
|
int main(int argc, char **argv)
|
|
{
|
|
int i;
|
|
|
|
if (argc < 2)
|
|
{
|
|
printf("** No arguments given\n");
|
|
return 0;
|
|
}
|
|
|
|
for (i = 1; i < argc; i++)
|
|
{
|
|
size_t filelen;
|
|
size_t readsize;
|
|
unsigned char *buffer;
|
|
FILE *f;
|
|
|
|
/* Handle a literal string. Copy to an exact size buffer so that checks for
|
|
overrunning work. */
|
|
|
|
if (argv[i][0] == '=')
|
|
{
|
|
readsize = strlen(argv[i]) - 1;
|
|
printf("------ <Literal> ------\n");
|
|
printf("Length = %lu\n", readsize);
|
|
printf("%.*s\n", (int)readsize, argv[i]+1);
|
|
buffer = (unsigned char *)malloc(readsize);
|
|
if (buffer == NULL)
|
|
printf("** Failed to allocate %lu bytes of memory\n", readsize);
|
|
else
|
|
{
|
|
memcpy(buffer, argv[i]+1, readsize);
|
|
LLVMFuzzerTestOneInput(buffer, readsize);
|
|
free(buffer);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* Handle a string given in a file */
|
|
|
|
f = fopen(argv[i], "rb");
|
|
if (f == NULL)
|
|
{
|
|
printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
|
|
continue;
|
|
}
|
|
|
|
printf("------ %s ------\n", argv[i]);
|
|
|
|
fseek(f, 0, SEEK_END);
|
|
filelen = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
|
|
buffer = (unsigned char *)malloc(filelen);
|
|
if (buffer == NULL)
|
|
{
|
|
printf("** Failed to allocate %lu bytes of memory\n", filelen);
|
|
fclose(f);
|
|
continue;
|
|
}
|
|
|
|
readsize = fread(buffer, 1, filelen, f);
|
|
fclose(f);
|
|
|
|
if (readsize != filelen)
|
|
printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
|
|
else
|
|
{
|
|
printf("Length = %lu\n", filelen);
|
|
LLVMFuzzerTestOneInput(buffer, filelen);
|
|
}
|
|
free(buffer);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif /* STANDALONE */
|
|
|
|
/* End */
|