From ba1e2e0cbb8a8ee9bd07d403612b8b2d3e315fbe Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 21 Nov 2014 12:19:37 +0000 Subject: [PATCH] Add additional tests and fix some compiler warnings; update stack information. --- RunTest | 15 +++++++++++++-- doc/pcre2stack.3 | 15 +++++++++------ maint/ManyConfigTests | 36 +++++++++++++++++++++++++----------- src/pcre2test.c | 43 ++++++++++++++++++++++--------------------- 4 files changed, 69 insertions(+), 40 deletions(-) diff --git a/RunTest b/RunTest index e7fbc74..8f6d0a0 100755 --- a/RunTest +++ b/RunTest @@ -277,6 +277,17 @@ if [ $link_size -gt 4 ] ; then exit 1 fi +# If it is possible to set the system stack size, arrange to set a value for +# test 2, which needs more than the even the Linux default when PCRE2 has been +# compiled with -fsanitize=address. + +$sim ./pcre2test -S 1 /dev/null /dev/null +if [ $? -eq 0 ] ; then + test2stack="-S 16" +else + test2stack="" +fi + # All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only # one need be. @@ -423,12 +434,12 @@ for bmode in "$test8" "$test16" "$test32"; do done fi - # PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals + # PCRE2 tests that are not Perl-compatible: API, errors, internals if [ $do2 = yes ] ; then echo $title2 "(excluding UTF-$bits)" for opt in "" $jitopt; do - $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry + $sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry if [ $? = 0 ] ; then checkresult $? 2 "$opt" else diff --git a/doc/pcre2stack.3 b/doc/pcre2stack.3 index bb7988a..8711263 100644 --- a/doc/pcre2stack.3 +++ b/doc/pcre2stack.3 @@ -1,4 +1,4 @@ -.TH PCRE2STACK 3 "20 October 2014" "PCRE2 10.00" +.TH PCRE2STACK 3 "21 November 2014" "PCRE2 10.00" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 DISCUSSION OF STACK USAGE" @@ -19,6 +19,12 @@ different numbers of a's. Furthermore, in a number of cases where the result of the recursive call would immediately be passed back as the result of the current call (a "tail recursion"), the function is just restarted instead. .P +Each time the internal \fBmatch()\fP function is called recursively, it uses +memory from the process stack. For certain kinds of pattern and data, very +large amounts of stack may be needed, despite the recognition of "tail +recursion". Note that if PCRE2 is compiled with the -fsanitize=address option +of the GCC compiler, the stack requirements are greatly increased. +.P The above comments apply when \fBpcre2_match()\fP is run in its normal interpretive manner. If the compiled pattern was processed by \fBpcre2_jit_compile()\fP, and just-in-time compiling was successful, and the @@ -47,10 +53,7 @@ relevant only for \fBpcre2_match()\fP without the JIT optimization. .SS "Reducing \fBpcre2_match()\fP's stack usage" .rs .sp -Each time that the internal \fBmatch()\fP function is called recursively, it -uses memory from the process stack. For certain kinds of pattern and data, very -large amounts of stack may be needed, despite the recognition of "tail -recursion". You can often reduce the amount of recursion, and therefore the +You can often reduce the amount of recursion, and therefore the amount of stack used, by modifying the pattern that is being matched. Consider, for example, this pattern: .sp @@ -194,6 +197,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 October 2014 +Last updated: 21 November 2014 Copyright (c) 1997-2014 University of Cambridge. .fi diff --git a/maint/ManyConfigTests b/maint/ManyConfigTests index 2fd08bc..f39ebba 100755 --- a/maint/ManyConfigTests +++ b/maint/ManyConfigTests @@ -53,7 +53,8 @@ tmp=/tmp/pcre2testing # special test turns optimization on, because it can provoke some compiler # warnings. -CFLAGS="-g -O0" +CFLAGS="-g" +OFLAGS="-O0" ISGCC=0 # If the compiler is gcc, add a lot of warning switches. @@ -81,7 +82,7 @@ rm -f /tmp/pcre2ccversion # This function runs a single test with the set of configuration options that # are in $opts. The source directory must be set in srcdir. The function must -# be defined as "runtest()" not "function runtest()" in order to run on +# be defined as "runtest()" not "function runtest()" in order to run on # Solaris. runtest() @@ -125,8 +126,8 @@ runtest() nlok=1 else nlok=0 - fi - + fi + ./pcre2test -C jit >/dev/null jit=$? ./pcre2test -C pcre2-8 >/dev/null @@ -138,7 +139,7 @@ runtest() if [ $? -ne 0 -o -s teststderr ]; then echo " " echo "**** Test failed ****" - cat teststderr + cat teststderr if [ -s teststdout ] ; then cat teststdout; fi exit 1 fi @@ -158,7 +159,7 @@ runtest() fi elif [ $nlok -gt 0 ]; then echo "Skipping pcre2grep tests: 8-bit library not compiled" - else + else echo "Skipping pcre2grep tests: newline is $nl" fi @@ -182,7 +183,7 @@ runtest() testtotal=`expr 20 \* $usemain + \ 1 \* $usetmp + \ - 1 \* $ISGCC \* $usemain + \ + 2 \* $ISGCC \* $usemain + \ 13 \* $usejit + \ \( 3 + 2 \* $usejit \) \* $usevalgrind` testcount=0 @@ -201,7 +202,11 @@ srcdir=. export srcdir # If gcc is in use, run a maximally configured test with -O2, because that can -# throw up warnings that are not detected with -O0. +# throw up warnings that are not detected with -O0. Then run a second test with +# -fsanitize=address, which also may throw up new warnings as well as checking +# things at runtime. Using -fsanitize=address increases the size of stack +# frames by a lot, so run this test with --disable-stack-for-recursion, as +# otherwise RunTest may fail on test 2. if [ $usejit -ne 0 ]; then enable_jit=--enable-jit @@ -211,13 +216,22 @@ fi if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then echo "---------- Maximally configured test with -O2 ----------" - SAVECLFAGS="$CFLAGS" - CFLAGS="$CFLAGS -O2" + SAVECFLAGS="$CFLAGS" + CFLAGS="-O2 $CFLAGS" + echo "CFLAGS=$CFLAGS" opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32" runtest - CFLAGS="$SAVECFLAGS" + echo "---------- Maximally configured test with -fsanitize=address ----------" + CFLAGS="$OFLAGS $SAVECFLAGS -fsanitize=address" + echo "CFLAGS=$CFLAGS" + opts="--disable-shared $enable_jit --disable-stack-for-recursion --enable-pcre2-16 --enable-pcre2-32" + runtest + CFLAGS="$OFLAGS $SAVECFLAGS" fi +echo "---------- CFLAGS for the remaining tests ----------" +echo "CFLAGS=$CFLAGS" + if [ $usemain -ne 0 ]; then echo "---------- Non-JIT tests in the current directory ----------" for opts in \ diff --git a/src/pcre2test.c b/src/pcre2test.c index 83f0447..777a980 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -215,7 +215,7 @@ for building the library. */ #include "pcre2posix.h" #include "pcre2_internal.h" -/* We need access to some of the data tables that PCRE uses. Defining +/* We need access to some of the data tables that PCRE2 uses. Defining PCRE2_PCRETEST makes some minor changes in the files. The previous definition of PRIV avoids name clashes. */ @@ -731,7 +731,7 @@ static uint32_t *pbuffer32 = NULL; #define CAST8VAR(x) CASTVAR(uint8_t *, x) #define SET(x,y) SETOP(x,y,=) #define SETPLUS(x,y) SETOP(x,y,+=) -#define strlen8 strlen +#define strlen8(x) strlen((char *)x) /* ---------------- Mode-dependent, runtime-testing macros ------------------*/ @@ -1625,7 +1625,7 @@ the three different cases. */ #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z #define SETOP(x,y,z) G(x,16) z y #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)y -#define STRLEN(p) (int)strlen16(p) +#define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p) #define SUB1(a,b) G(a,16)(G(b,16)) #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) #define TEST(x,r,y) (G(x,16) r (y)) @@ -1706,7 +1706,7 @@ the three different cases. */ #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z #define SETOP(x,y,z) G(x,32) z y #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)y -#define STRLEN(p) (int)strlen32(p) +#define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p) #define SUB1(a,b) G(a,32)(G(b,32)) #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) #define TEST(x,r,y) (G(x,32) r (y)) @@ -2130,7 +2130,7 @@ return (PCRE2_JIT_STACK *)arg; and returns the codepoint of that character. Note that the function supports the original UTF-8 definition of RFC 2279, allowing for values in the range 0 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate -codepoints greater than 0x10ffff which are useful for testing PCRE's error +codepoints greater than 0x10ffff which are useful for testing PCRE2's error checking, and also for generating 32-bit non-UTF data values above the UTF limit. @@ -2235,7 +2235,7 @@ return n >= 0 ? n : 0; * Find length of 0-terminated 16-bit string * *************************************************/ -static int strlen16(PCRE2_SPTR16 p) +static size_t strlen16(PCRE2_SPTR16 p) { PCRE2_SPTR16 pp = p; while (*pp != 0) pp++; @@ -2250,7 +2250,7 @@ return (int)(pp - p); * Find length of 0-terminated 32-bit string * *************************************************/ -static int strlen32(PCRE2_SPTR32 p) +static size_t strlen32(PCRE2_SPTR32 p) { PCRE2_SPTR32 pp = p; while (*pp != 0) pp++; @@ -2430,7 +2430,7 @@ if (pbuffer16_size < 2*len + 2) pbuffer16 = (uint16_t *)malloc(pbuffer16_size); if (pbuffer16 == NULL) { - fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer16\n", + fprintf(stderr, "pcre2test: malloc(%ld) failed for pbuffer16\n", pbuffer16_size); exit(1); } @@ -2507,7 +2507,7 @@ if (pbuffer32_size < 4*len + 4) pbuffer32 = (uint32_t *)malloc(pbuffer32_size); if (pbuffer32 == NULL) { - fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer32\n", + fprintf(stderr, "pcre2test: malloc(%ld) failed for pbuffer32\n", pbuffer32_size); exit(1); } @@ -4114,11 +4114,12 @@ return capcount; * Callout function * *************************************************/ -/* Called from PCRE as a result of the (?C) item. We print out where we are in -the match. Yield zero unless more callouts than the fail count, or the callout -data is not zero. The only differences in the callout block for different code -unit widths are that the pointers to the subject and the most recent MARK point -to strings of the appropriate width. Casts can be used to deal with this. +/* Called from a PCRE2 library as a result of the (?C) item. We print out where +we are in the match. Yield zero unless more callouts than the fail count, or +the callout data is not zero. The only differences in the callout block for +different code unit widths are that the pointers to the subject and the most +recent MARK point to strings of the appropriate width. Casts can be used to +deal with this. Argument: a pointer to a callout block Return: @@ -5672,7 +5673,7 @@ for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp); /* Output is always to stdout. Arguments: - rc the return code from PCRE_CONFIG_NEWLINE + rc the return code from PCRE2_CONFIG_NEWLINE isc TRUE if called from "-C newline" Returns: nothing */ @@ -5732,7 +5733,7 @@ printf(" -dfa set default subject control 'dfa'\n"); printf(" -help show usage information\n"); printf(" -i set default pattern control 'info'\n"); printf(" -jit set default pattern control 'jit'\n"); -printf(" -q quiet: do not output PCRE version number at start\n"); +printf(" -q quiet: do not output PCRE2 version number at start\n"); printf(" -pattern set default pattern control fields\n"); printf(" -subject set default subject control fields\n"); printf(" -S set stack size to megabytes\n"); @@ -5982,7 +5983,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) test_mode = PCRE8_MODE; #else fprintf(stderr, - "** This version of PCRE was built without 8-bit support\n"); + "** This version of PCRE2 was built without 8-bit support\n"); exit(1); #endif } @@ -5992,7 +5993,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) test_mode = PCRE16_MODE; #else fprintf(stderr, - "** This version of PCRE was built without 16-bit support\n"); + "** This version of PCRE2 was built without 16-bit support\n"); exit(1); #endif } @@ -6002,7 +6003,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) test_mode = PCRE32_MODE; #else fprintf(stderr, - "** This version of PCRE was built without 32-bit support\n"); + "** This version of PCRE2 was built without 32-bit support\n"); exit(1); #endif } @@ -6017,7 +6018,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) ((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0)) { #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) - fprintf(stderr, "PCRE: -S is not supported on this OS\n"); + fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); exit(1); #else int rc; @@ -6027,7 +6028,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) rc = setrlimit(RLIMIT_STACK, &rlim); if (rc != 0) { - fprintf(stderr, "PCRE: setrlimit() failed with error %d\n", rc); + fprintf(stderr, "pcre2test: setrlimit() failed with error %d\n", rc); exit(1); } op++;