diff --git a/ChangeLog b/ChangeLog index 5ef9a46..997eb1a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -112,6 +112,11 @@ only when PCRE2_NO_START_OPTIMIZE was *not* set: 16. The "offset" modifier in pcre2test was not being ignored (as documented) when the POSIX API was in use. +17. Added --enable-fuzz-support to "configure", causing an non-installed + library containing a test function that can be called by fuzzers to be + compiled. A non-installed binary to run the test function locally, called + pcre2fuzzcheck is also compiled. + Version 10.22 29-July-2016 -------------------------- diff --git a/Makefile.am b/Makefile.am index 951d844..6386f81 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,6 @@ AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -I$(builddir)/src -I$(srcdir)/src ## Specify the documentation files that are distributed. @@ -473,7 +472,7 @@ libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS) endif # WITH_GCOV endif # WITH_PCRE2_8 -## Build pcre2grep if the 8-bit library is enabled +## Build pcre2grep and optional fuzzer stuff if the 8-bit library is enabled if WITH_PCRE2_8 bin_PROGRAMS += pcre2grep @@ -485,6 +484,22 @@ if WITH_GCOV pcre2grep_CFLAGS += $(GCOV_CFLAGS) pcre2grep_LDADD += $(GCOV_LIBS) endif # WITH_GCOV + +## If fuzzer support is enabled, build a non-distributed library containing the +## fuzzing function. Also build the standalone checking binary from the same +## source but using -DSTANDALONE. + +if WITH_FUZZ_SUPPORT +noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a +_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +_libs_libpcre2_fuzzsupport_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck +pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +pcre2fuzzcheck_LDADD = libpcre2-8.la +endif # WITH FUZZ_SUPPORT endif # WITH_PCRE2_8 ## -------- Testing ---------- diff --git a/configure.ac b/configure.ac index 26e3371..fc5cea2 100644 --- a/configure.ac +++ b/configure.ac @@ -295,7 +295,7 @@ AC_ARG_WITH(match-limit-recursion, # Handle --enable-valgrind AC_ARG_ENABLE(valgrind, AS_HELP_STRING([--enable-valgrind], - [valgrind support]), + [enable valgrind support]), , enable_valgrind=no) # Enable code coverage reports using gcov @@ -304,6 +304,12 @@ AC_ARG_ENABLE(coverage, [enable code coverage reports using gcov]), , enable_coverage=no) +# Handle --enable-fuzz-support +AC_ARG_ENABLE(fuzz_support, + AS_HELP_STRING([--enable-fuzz-support], + [enable fuzzer support]), + , enable_fuzz_support=no) + # Set the default value for pcre2-8 if test "x$enable_pcre2_8" = "xunset" then @@ -420,6 +426,12 @@ AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xy AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes") AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") +AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes") + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi # Checks for typedefs, structures, and compiler characteristics. @@ -966,6 +978,7 @@ $PACKAGE-$VERSION configuration summary: Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline} Valgrind support ................... : ${enable_valgrind} Code coverage ...................... : ${enable_coverage} + Fuzzer support ..................... : ${enable_fuzz_support} EOF diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c new file mode 100644 index 0000000..2c0e56a --- /dev/null +++ b/src/pcre2_fuzzsupport.c @@ -0,0 +1,271 @@ +/*************************************************************************** +Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it +tries to compile and match it, deriving options from the string itself. If +STANDALONE is defined, a main program that calls the driver with the contents +of specified files is compiled, and commentary on what is happening is output. +If an argument starts with '=' the rest of it it is taken as a literal string +rather than a file name. This allows easy testing of short strings. + +Written by Philip Hazel, October 2016 +***************************************************************************/ + +#include +#include +#include +#include + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" + +#define ALLOWED_COMPILE_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ + PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ + PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ + PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ + PCRE2_UTF) + +#define ALLOWED_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT) + +/* Putting in this apparently unnecessary prototype prevents gcc from giving a +"no previous prototype" warning when compiling at high warning level. */ + +int LLVMFuzzerTestOneInput(const unsigned char *, size_t); + +/* Here's the driving function. */ + +int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) +{ +uint32_t compile_options; +uint32_t match_options; +pcre2_match_data *match_data = NULL; +int r1, r2; +int i; + +if (size < 1) return 0; + +/* Figure out some options to use. Initialize the random number to ensure +repeatability. Ensure that we get a 32-bit unsigned random number for testing +options. (RAND_MAX is required to be at least 32767, but is commonly +2147483647, which excludes the top bit.) */ + +srand((unsigned int)(data[size/2])); +r1 = rand(); +r2 = rand(); + +/* Ensure that all undefined option bits are zero (waste of time trying them) +and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the +input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no +reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because +\C in random patterns is highly likely to cause a crash. */ + +compile_options = + ((uint32_t)((r1 << 16) | (r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) | + PCRE2_NEVER_BACKSLASH_C; + +match_options = + ((uint32_t)((r1 << 16) | (r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS); + +/* Do the compile with and without the options, and after a successful compile, +likewise do the match with and without the options. */ + +for (i = 0; i < 2; i++) + { + int errorcode; + PCRE2_SIZE erroroffset; + pcre2_code *code; + +#ifdef STANDALONE + printf("Compile with options %.8x", compile_options); + printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", + ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", + ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", + ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "", + ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "", + ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "", + ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", + ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", + ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", + ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", + ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", + ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", + ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "", + ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? ",never_backslash_c" : "", + ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "", + ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "", + ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "", + ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "", + ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "", + ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "", + ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "", + ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "", + ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "", + ((compile_options & PCRE2_UTF) != 0)? ",utf" : ""); +#endif + + code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options, + &errorcode, &erroroffset, NULL); + + if (code != NULL) + { + int j; + uint32_t save_match_options = match_options; + + /* Create a match data block only when we first need it. */ + + if (match_data == NULL) + { + match_data = pcre2_match_data_create(32, NULL); + if (match_data == NULL) + { +#ifdef STANDALONE + printf("** Failed to create match data block\n"); +#endif + return 0; + } + } + + /* Match twice, with and without options */ + + for (j = 0; j < 2; j++) + { +#ifdef STANDALONE + printf("Match with options %.8x", match_options); + printf("%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); +#endif + + errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)size, 0, + match_options, match_data, NULL); + +#ifdef STANDALONE + if (errorcode >= 0) printf("Match returned %d\n", errorcode); else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); + printf("Match failed: error %d: %s\n", errorcode, buffer); + } +#endif + + match_options = 0; /* For second time */ + } + + match_options = save_match_options; /* Reset for the second compile */ + pcre2_code_free(code); + } + +#ifdef STANDALONE + else + { + unsigned char buffer[256]; + pcre2_get_error_message(errorcode, buffer, 256); + printf("Compile failed: error %d at offset %lu: %s\n", errorcode, + erroroffset, buffer); + } +#endif + + compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */ + } + +if (match_data != NULL) pcre2_match_data_free(match_data); +return 0; +} + + +/* Optional main program. */ + +#ifdef STANDALONE +int main(int argc, char **argv) +{ +int i; + +if (argc < 2) + { + printf("** No arguments given\n"); + return 0; + } + +for (i = 1; i < argc; i++) + { + size_t filelen; + size_t readsize; + unsigned char *buffer; + FILE *f; + + /* Handle a literal string. Copy to an exact size buffer so that checks for + overrunning work. */ + + if (argv[i][0] == '=') + { + readsize = strlen(argv[i]) - 1; + printf("------ ------\n"); + printf("Length = %lu\n", readsize); + printf("%.*s\n", (int)readsize, argv[i]+1); + buffer = (unsigned char *)malloc(readsize); + if (buffer == NULL) + printf("** Failed to allocate %lu bytes of memory\n", readsize); + else + { + memcpy(buffer, argv[i]+1, readsize); + LLVMFuzzerTestOneInput(buffer, readsize); + free(buffer); + } + continue; + } + + /* Handle a string given in a file */ + + f = fopen(argv[i], "rb"); + if (f == NULL) + { + printf("** Failed to open %s: %s\n", argv[i], strerror(errno)); + continue; + } + + printf("------ %s ------\n", argv[i]); + + fseek(f, 0, SEEK_END); + filelen = ftell(f); + fseek(f, 0, SEEK_SET); + + buffer = (unsigned char *)malloc(filelen); + if (buffer == NULL) + { + printf("** Failed to allocate %lu bytes of memory\n", filelen); + fclose(f); + continue; + } + + readsize = fread(buffer, 1, filelen, f); + fclose(f); + + if (readsize != filelen) + printf("** File size is %lu but fread() returned %lu\n", filelen, readsize); + else + { + printf("Length = %lu\n", filelen); + LLVMFuzzerTestOneInput(buffer, filelen); + } + free(buffer); + } + +return 0; +} +#endif /* STANDALONE */ + +/* End */