Add additional tests and fix some compiler warnings; update stack information.

This commit is contained in:
Philip.Hazel 2014-11-21 12:19:37 +00:00
parent 69176e79a5
commit ba1e2e0cbb
4 changed files with 69 additions and 40 deletions

15
RunTest
View File

@ -277,6 +277,17 @@ if [ $link_size -gt 4 ] ; then
exit 1 exit 1
fi fi
# If it is possible to set the system stack size, arrange to set a value for
# test 2, which needs more than the even the Linux default when PCRE2 has been
# compiled with -fsanitize=address.
$sim ./pcre2test -S 1 /dev/null /dev/null
if [ $? -eq 0 ] ; then
test2stack="-S 16"
else
test2stack=""
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only # All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
# one need be. # one need be.
@ -423,12 +434,12 @@ for bmode in "$test8" "$test16" "$test32"; do
done done
fi fi
# PCRE2 tests that are not JIT or Perl-compatible: API, errors, internals # PCRE2 tests that are not Perl-compatible: API, errors, internals
if [ $do2 = yes ] ; then if [ $do2 = yes ] ; then
echo $title2 "(excluding UTF-$bits)" echo $title2 "(excluding UTF-$bits)"
for opt in "" $jitopt; do for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput2 testtry $sim $valgrind ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then if [ $? = 0 ] ; then
checkresult $? 2 "$opt" checkresult $? 2 "$opt"
else else

View File

@ -1,4 +1,4 @@
.TH PCRE2STACK 3 "20 October 2014" "PCRE2 10.00" .TH PCRE2STACK 3 "21 November 2014" "PCRE2 10.00"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 DISCUSSION OF STACK USAGE" .SH "PCRE2 DISCUSSION OF STACK USAGE"
@ -19,6 +19,12 @@ different numbers of a's. Furthermore, in a number of cases where the result of
the recursive call would immediately be passed back as the result of the the recursive call would immediately be passed back as the result of the
current call (a "tail recursion"), the function is just restarted instead. current call (a "tail recursion"), the function is just restarted instead.
.P .P
Each time the internal \fBmatch()\fP function is called recursively, it uses
memory from the process stack. For certain kinds of pattern and data, very
large amounts of stack may be needed, despite the recognition of "tail
recursion". Note that if PCRE2 is compiled with the -fsanitize=address option
of the GCC compiler, the stack requirements are greatly increased.
.P
The above comments apply when \fBpcre2_match()\fP is run in its normal The above comments apply when \fBpcre2_match()\fP is run in its normal
interpretive manner. If the compiled pattern was processed by interpretive manner. If the compiled pattern was processed by
\fBpcre2_jit_compile()\fP, and just-in-time compiling was successful, and the \fBpcre2_jit_compile()\fP, and just-in-time compiling was successful, and the
@ -47,10 +53,7 @@ relevant only for \fBpcre2_match()\fP without the JIT optimization.
.SS "Reducing \fBpcre2_match()\fP's stack usage" .SS "Reducing \fBpcre2_match()\fP's stack usage"
.rs .rs
.sp .sp
Each time that the internal \fBmatch()\fP function is called recursively, it You can often reduce the amount of recursion, and therefore the
uses memory from the process stack. For certain kinds of pattern and data, very
large amounts of stack may be needed, despite the recognition of "tail
recursion". You can often reduce the amount of recursion, and therefore the
amount of stack used, by modifying the pattern that is being matched. Consider, amount of stack used, by modifying the pattern that is being matched. Consider,
for example, this pattern: for example, this pattern:
.sp .sp
@ -194,6 +197,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 20 October 2014 Last updated: 21 November 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
.fi .fi

View File

@ -53,7 +53,8 @@ tmp=/tmp/pcre2testing
# special test turns optimization on, because it can provoke some compiler # special test turns optimization on, because it can provoke some compiler
# warnings. # warnings.
CFLAGS="-g -O0" CFLAGS="-g"
OFLAGS="-O0"
ISGCC=0 ISGCC=0
# If the compiler is gcc, add a lot of warning switches. # If the compiler is gcc, add a lot of warning switches.
@ -182,7 +183,7 @@ runtest()
testtotal=`expr 20 \* $usemain + \ testtotal=`expr 20 \* $usemain + \
1 \* $usetmp + \ 1 \* $usetmp + \
1 \* $ISGCC \* $usemain + \ 2 \* $ISGCC \* $usemain + \
13 \* $usejit + \ 13 \* $usejit + \
\( 3 + 2 \* $usejit \) \* $usevalgrind` \( 3 + 2 \* $usejit \) \* $usevalgrind`
testcount=0 testcount=0
@ -201,7 +202,11 @@ srcdir=.
export srcdir export srcdir
# If gcc is in use, run a maximally configured test with -O2, because that can # If gcc is in use, run a maximally configured test with -O2, because that can
# throw up warnings that are not detected with -O0. # throw up warnings that are not detected with -O0. Then run a second test with
# -fsanitize=address, which also may throw up new warnings as well as checking
# things at runtime. Using -fsanitize=address increases the size of stack
# frames by a lot, so run this test with --disable-stack-for-recursion, as
# otherwise RunTest may fail on test 2.
if [ $usejit -ne 0 ]; then if [ $usejit -ne 0 ]; then
enable_jit=--enable-jit enable_jit=--enable-jit
@ -211,13 +216,22 @@ fi
if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then
echo "---------- Maximally configured test with -O2 ----------" echo "---------- Maximally configured test with -O2 ----------"
SAVECLFAGS="$CFLAGS" SAVECFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -O2" CFLAGS="-O2 $CFLAGS"
echo "CFLAGS=$CFLAGS"
opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32" opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32"
runtest runtest
CFLAGS="$SAVECFLAGS" echo "---------- Maximally configured test with -fsanitize=address ----------"
CFLAGS="$OFLAGS $SAVECFLAGS -fsanitize=address"
echo "CFLAGS=$CFLAGS"
opts="--disable-shared $enable_jit --disable-stack-for-recursion --enable-pcre2-16 --enable-pcre2-32"
runtest
CFLAGS="$OFLAGS $SAVECFLAGS"
fi fi
echo "---------- CFLAGS for the remaining tests ----------"
echo "CFLAGS=$CFLAGS"
if [ $usemain -ne 0 ]; then if [ $usemain -ne 0 ]; then
echo "---------- Non-JIT tests in the current directory ----------" echo "---------- Non-JIT tests in the current directory ----------"
for opts in \ for opts in \

View File

@ -215,7 +215,7 @@ for building the library. */
#include "pcre2posix.h" #include "pcre2posix.h"
#include "pcre2_internal.h" #include "pcre2_internal.h"
/* We need access to some of the data tables that PCRE uses. Defining /* We need access to some of the data tables that PCRE2 uses. Defining
PCRE2_PCRETEST makes some minor changes in the files. The previous definition PCRE2_PCRETEST makes some minor changes in the files. The previous definition
of PRIV avoids name clashes. */ of PRIV avoids name clashes. */
@ -731,7 +731,7 @@ static uint32_t *pbuffer32 = NULL;
#define CAST8VAR(x) CASTVAR(uint8_t *, x) #define CAST8VAR(x) CASTVAR(uint8_t *, x)
#define SET(x,y) SETOP(x,y,=) #define SET(x,y) SETOP(x,y,=)
#define SETPLUS(x,y) SETOP(x,y,+=) #define SETPLUS(x,y) SETOP(x,y,+=)
#define strlen8 strlen #define strlen8(x) strlen((char *)x)
/* ---------------- Mode-dependent, runtime-testing macros ------------------*/ /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
@ -1625,7 +1625,7 @@ the three different cases. */
#define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
#define SETOP(x,y,z) G(x,16) z y #define SETOP(x,y,z) G(x,16) z y
#define SETCASTPTR(x,y) G(x,16) = (uint16_t *)y #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)y
#define STRLEN(p) (int)strlen16(p) #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
#define SUB1(a,b) G(a,16)(G(b,16)) #define SUB1(a,b) G(a,16)(G(b,16))
#define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
#define TEST(x,r,y) (G(x,16) r (y)) #define TEST(x,r,y) (G(x,16) r (y))
@ -1706,7 +1706,7 @@ the three different cases. */
#define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
#define SETOP(x,y,z) G(x,32) z y #define SETOP(x,y,z) G(x,32) z y
#define SETCASTPTR(x,y) G(x,32) = (uint32_t *)y #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)y
#define STRLEN(p) (int)strlen32(p) #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
#define SUB1(a,b) G(a,32)(G(b,32)) #define SUB1(a,b) G(a,32)(G(b,32))
#define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
#define TEST(x,r,y) (G(x,32) r (y)) #define TEST(x,r,y) (G(x,32) r (y))
@ -2130,7 +2130,7 @@ return (PCRE2_JIT_STACK *)arg;
and returns the codepoint of that character. Note that the function supports and returns the codepoint of that character. Note that the function supports
the original UTF-8 definition of RFC 2279, allowing for values in the range 0 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
to 0x7fffffff, up to 6 bytes long. This makes it possible to generate to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
codepoints greater than 0x10ffff which are useful for testing PCRE's error codepoints greater than 0x10ffff which are useful for testing PCRE2's error
checking, and also for generating 32-bit non-UTF data values above the UTF checking, and also for generating 32-bit non-UTF data values above the UTF
limit. limit.
@ -2235,7 +2235,7 @@ return n >= 0 ? n : 0;
* Find length of 0-terminated 16-bit string * * Find length of 0-terminated 16-bit string *
*************************************************/ *************************************************/
static int strlen16(PCRE2_SPTR16 p) static size_t strlen16(PCRE2_SPTR16 p)
{ {
PCRE2_SPTR16 pp = p; PCRE2_SPTR16 pp = p;
while (*pp != 0) pp++; while (*pp != 0) pp++;
@ -2250,7 +2250,7 @@ return (int)(pp - p);
* Find length of 0-terminated 32-bit string * * Find length of 0-terminated 32-bit string *
*************************************************/ *************************************************/
static int strlen32(PCRE2_SPTR32 p) static size_t strlen32(PCRE2_SPTR32 p)
{ {
PCRE2_SPTR32 pp = p; PCRE2_SPTR32 pp = p;
while (*pp != 0) pp++; while (*pp != 0) pp++;
@ -2430,7 +2430,7 @@ if (pbuffer16_size < 2*len + 2)
pbuffer16 = (uint16_t *)malloc(pbuffer16_size); pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL) if (pbuffer16 == NULL)
{ {
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer16\n", fprintf(stderr, "pcre2test: malloc(%ld) failed for pbuffer16\n",
pbuffer16_size); pbuffer16_size);
exit(1); exit(1);
} }
@ -2507,7 +2507,7 @@ if (pbuffer32_size < 4*len + 4)
pbuffer32 = (uint32_t *)malloc(pbuffer32_size); pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL) if (pbuffer32 == NULL)
{ {
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer32\n", fprintf(stderr, "pcre2test: malloc(%ld) failed for pbuffer32\n",
pbuffer32_size); pbuffer32_size);
exit(1); exit(1);
} }
@ -4114,11 +4114,12 @@ return capcount;
* Callout function * * Callout function *
*************************************************/ *************************************************/
/* Called from PCRE as a result of the (?C) item. We print out where we are in /* Called from a PCRE2 library as a result of the (?C) item. We print out where
the match. Yield zero unless more callouts than the fail count, or the callout we are in the match. Yield zero unless more callouts than the fail count, or
data is not zero. The only differences in the callout block for different code the callout data is not zero. The only differences in the callout block for
unit widths are that the pointers to the subject and the most recent MARK point different code unit widths are that the pointers to the subject and the most
to strings of the appropriate width. Casts can be used to deal with this. recent MARK point to strings of the appropriate width. Casts can be used to
deal with this.
Argument: a pointer to a callout block Argument: a pointer to a callout block
Return: Return:
@ -5672,7 +5673,7 @@ for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
/* Output is always to stdout. /* Output is always to stdout.
Arguments: Arguments:
rc the return code from PCRE_CONFIG_NEWLINE rc the return code from PCRE2_CONFIG_NEWLINE
isc TRUE if called from "-C newline" isc TRUE if called from "-C newline"
Returns: nothing Returns: nothing
*/ */
@ -5732,7 +5733,7 @@ printf(" -dfa set default subject control 'dfa'\n");
printf(" -help show usage information\n"); printf(" -help show usage information\n");
printf(" -i set default pattern control 'info'\n"); printf(" -i set default pattern control 'info'\n");
printf(" -jit set default pattern control 'jit'\n"); printf(" -jit set default pattern control 'jit'\n");
printf(" -q quiet: do not output PCRE version number at start\n"); printf(" -q quiet: do not output PCRE2 version number at start\n");
printf(" -pattern <s> set default pattern control fields\n"); printf(" -pattern <s> set default pattern control fields\n");
printf(" -subject <s> set default subject control fields\n"); printf(" -subject <s> set default subject control fields\n");
printf(" -S <n> set stack size to <n> megabytes\n"); printf(" -S <n> set stack size to <n> megabytes\n");
@ -5982,7 +5983,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
test_mode = PCRE8_MODE; test_mode = PCRE8_MODE;
#else #else
fprintf(stderr, fprintf(stderr,
"** This version of PCRE was built without 8-bit support\n"); "** This version of PCRE2 was built without 8-bit support\n");
exit(1); exit(1);
#endif #endif
} }
@ -5992,7 +5993,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
test_mode = PCRE16_MODE; test_mode = PCRE16_MODE;
#else #else
fprintf(stderr, fprintf(stderr,
"** This version of PCRE was built without 16-bit support\n"); "** This version of PCRE2 was built without 16-bit support\n");
exit(1); exit(1);
#endif #endif
} }
@ -6002,7 +6003,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
test_mode = PCRE32_MODE; test_mode = PCRE32_MODE;
#else #else
fprintf(stderr, fprintf(stderr,
"** This version of PCRE was built without 32-bit support\n"); "** This version of PCRE2 was built without 32-bit support\n");
exit(1); exit(1);
#endif #endif
} }
@ -6017,7 +6018,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0)) ((stack_size = get_value(argv[op+1], &endptr)), *endptr == 0))
{ {
#if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
fprintf(stderr, "PCRE: -S is not supported on this OS\n"); fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
exit(1); exit(1);
#else #else
int rc; int rc;
@ -6027,7 +6028,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
rc = setrlimit(RLIMIT_STACK, &rlim); rc = setrlimit(RLIMIT_STACK, &rlim);
if (rc != 0) if (rc != 0)
{ {
fprintf(stderr, "PCRE: setrlimit() failed with error %d\n", rc); fprintf(stderr, "pcre2test: setrlimit() failed with error %d\n", rc);
exit(1); exit(1);
} }
op++; op++;