Fix pedantic infelicities shown up by clang and a UTF-8 checking overflow bug.

This commit is contained in:
Philip.Hazel 2015-07-24 13:30:50 +00:00
parent 83c7c24b92
commit 1bcfb856f5
8 changed files with 85 additions and 61 deletions

View File

@ -77,6 +77,10 @@ whose condition was an assertion preceded by an explicit callout with a string
argument might be incorrectly processed, especially if the string contained \Q.
This bug was discovered by Karl Skomski with the LLVM fuzzer.
21. Compiling PCRE2 with the sanitize options of clang showed up a number of
very pedantic coding infelicities and a buffer overflow while checking a UTF-8
string if the final multi-byte UTF-8 character was truncated.
Version 10.20 30-June-2015
--------------------------

55
RunTest
View File

@ -33,6 +33,10 @@
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
# be given without the leading "-" character.
#
# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need
# very much more stack than normal. In environments where the stack can be
# set at runtime, -bigstack sets a gigantic stack.
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
@ -184,6 +188,7 @@ arg8=
arg16=
arg32=
nojit=
bigstack=
sim=
skip=
valgrind=
@ -240,6 +245,7 @@ while [ $# -gt 0 ] ; do
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
bigstack|-bigstack) bigstack=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
@ -287,13 +293,22 @@ fi
# If it is possible to set the system stack size, arrange to set a value for
# test 2, which needs more than the even the Linux default when PCRE2 has been
# compiled with -fsanitize=address.
# compiled by gcc with -fsanitize=address. When the compiler is clang, sanitize
# options require an even bigger stack for test 2, and an increased stack for
# some of the other tests.
$sim ./pcre2test -S 1 /dev/null /dev/null
if [ $? -eq 0 ] ; then
test2stack="-S 16"
if [ "$bigstack" = "" ] ; then
test2stack="-S 16"
defaultstack=""
else
test2stack="-S 1024"
defaultstack="-S 64"
fi
else
test2stack=""
defaultstack=""
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
@ -438,7 +453,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput1 testtry
checkresult $? 1 "$opt"
done
fi
@ -508,7 +523,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$locale" != "" ] ; then
echo $title3 "(using '$locale' locale)"
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $infile testtry
if [ $? = 0 ] ; then
case "$opt" in
-jit) with=" with JIT";;
@ -545,7 +560,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput4 testtry
checkresult $? 4 "$opt"
done
fi
@ -557,7 +572,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput5 testtry
checkresult $? 5 "$opt"
done
fi
@ -567,7 +582,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do6 = yes ] ; then
echo $title6
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput6 testtry
checkresult $? 6 ""
fi
@ -576,7 +591,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput7 testtry
checkresult $? 7 ""
fi
fi
@ -596,7 +611,7 @@ for bmode in "$test8" "$test16" "$test32"; do
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput8 testtry
checkresult $? 8-$bits ""
fi
fi
@ -609,7 +624,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput9 testtry
checkresult $? 9 "$opt"
done
fi
@ -625,7 +640,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput10 testtry
checkresult $? 10 "$opt"
done
fi
@ -639,7 +654,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput11 testtry
checkresult $? 11-$bits "$opt"
done
fi
@ -656,7 +671,7 @@ for bmode in "$test8" "$test16" "$test32"; do
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
$sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput12 testtry
checkresult $? 12-$bits "$opt"
done
fi
@ -669,7 +684,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput13 testtry
checkresult $? 13 ""
fi
fi
@ -678,7 +693,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do14 = yes ] ; then
echo $title14
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput14 testtry
checkresult $? 14 ""
fi
@ -689,7 +704,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
checkresult $? 15 ""
fi
fi
@ -701,7 +716,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or nojit was specified"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
checkresult $? 16 ""
fi
fi
@ -713,7 +728,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
checkresult $? 17 ""
fi
fi
@ -727,7 +742,7 @@ for bmode in "$test8" "$test16" "$test32"; do
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
checkresult $? 18 ""
fi
fi
@ -736,7 +751,7 @@ for bmode in "$test8" "$test16" "$test32"; do
if [ $do19 = yes ] ; then
echo $title19
$sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry
$sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
checkresult $? 19 ""
fi

View File

@ -270,7 +270,7 @@ in UTF-8 mode. It runs from '0' to 'z'. */
#ifndef EBCDIC
#define ESCAPES_FIRST CHAR_0
#define ESCAPES_LAST CHAR_z
#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */
#define UPPER_CASE(c) (c-32)
static const short int escapes[] = {
0, 0,
@ -323,11 +323,11 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
#define ESCAPES_FIRST CHAR_a
#define ESCAPES_LAST CHAR_9
#define ESCAPES_UPPER_CASE (+64) /* Add this to upper case a letter */
#define UPPER_CASE(c) (c+64)
#else /* Testing in an ASCII environment */
#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */
#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */
#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */
#define UPPER_CASE(c) (c-32)
#endif
static const short int escapes[] = {
@ -1884,7 +1884,7 @@ else
s = cb->bracount - (s - 1);
}
escape = -s;
escape = -(int)s;
break;
/* The handling of escape sequences consisting of a string of digits
@ -1909,7 +1909,7 @@ else
{
oldptr = ptr;
/* The integer range is limited by the machine's int representation. */
s = (int)(c - CHAR_0);
s = c - CHAR_0;
overflow = FALSE;
while (IS_DIGIT(ptr[1]))
{
@ -1933,7 +1933,7 @@ else
if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount)
{
escape = -s; /* Indicates a back reference */
escape = -(int)s; /* Indicates a back reference */
break;
}
ptr = oldptr; /* Put the pointer back and fall through */
@ -1981,7 +1981,7 @@ else
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x20000000l) { overflow = TRUE; break; }
#endif
c = (c << 3) + cc - CHAR_0 ;
c = (c << 3) + (cc - CHAR_0);
#if PCRE2_CODE_UNIT_WIDTH == 8
if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
#elif PCRE2_CODE_UNIT_WIDTH == 16
@ -2105,7 +2105,7 @@ else
#endif
c = *(++ptr);
if (c >= CHAR_a && c <= CHAR_z) c += ESCAPES_UPPER_CASE;
if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
if (c == CHAR_NULL && ptr >= cb->end_pattern)
{
*errorcodeptr = ERR2;
@ -3532,7 +3532,7 @@ for (; ptr < cb->end_pattern; ptr++)
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
nest_depth--;
if (nest_depth > 0) nest_depth--; /* Can be 0 for unmatched ) */
break;
}
}
@ -3938,14 +3938,16 @@ for (;; ptr++)
if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
{
nestptr = ptr + 7;
ptr = sub_start_of_word - 1;
ptr = sub_start_of_word; /* Do not combine these statements; clang's */
ptr--; /* sanitizer moans about a negative index. */
continue;
}
if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
{
nestptr = ptr + 7;
ptr = sub_end_of_word - 1;
ptr = sub_end_of_word; /* Do not combine these statements; clang's */
ptr--; /* sanitizer moans about a negative index. */
continue;
}
@ -5960,7 +5962,7 @@ for (;; ptr++)
goto FAILED;
}
if (refsign != 0) recno = (refsign == CHAR_MINUS)?
cb->bracount - recno + 1 : recno + cb->bracount;
(cb->bracount + 1) - recno : recno + cb->bracount;
if (recno <= 0 || (uint32_t)recno > cb->final_bracount)
{
*errorcodeptr = ERR15;
@ -6490,7 +6492,7 @@ for (;; ptr++)
*errorcodeptr = ERR58;
goto FAILED;
}
recno = cb->bracount - recno + 1;
recno = (int)(cb->bracount + 1) - recno;
if (recno <= 0)
{
*errorcodeptr = ERR15;
@ -8183,7 +8185,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
while (IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + ptr[pp++] - CHAR_0;
c = c*10 + (ptr[pp++] - CHAR_0);
}
if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
{

View File

@ -3172,7 +3172,7 @@ occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO

View File

@ -194,7 +194,7 @@ if (caseless)
GETCHARINC(c, eptr);
GETCHARINC(d, p);
ur = GET_UCD(d);
if (c != d && c != d + ur->other_case)
if (c != d && c != (uint32_t)((int)d + ur->other_case))
{
const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
for (;;)
@ -211,7 +211,7 @@ if (caseless)
/* Not in UTF mode */
{
while (length-- > 0)
for (; length > 0; length--)
{
uint32_t cc, cp;
if (eptr >= mb->end_subject) return 1; /* Partial match */
@ -226,11 +226,11 @@ if (caseless)
}
/* In the caseful case, we can just compare the code units, whether or not we
are in UT mode. */
are in UTF mode. */
else
{
while (length-- > 0)
for (; length > 0; length--)
{
if (eptr >= mb->end_subject) return 1; /* Partial match */
if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /*No match */
@ -3342,7 +3342,10 @@ for (;;)
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
for (; length > 0; length--)
{
if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
}
}
else
#endif
@ -6513,7 +6516,7 @@ occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
@ -6783,7 +6786,7 @@ for(;;)
end_subject = t;
}
/* Advance to a unique first code unit if there is one. In 8-bit mode, the
/* Advance to a unique first code unit if there is one. In 8-bit mode, the
use of memchr() gives a big speed up. */
if (has_first_cu)
@ -6801,8 +6804,8 @@ for(;;)
#else
start_match = memchr(start_match, first_cu, end_subject - start_match);
if (start_match == NULL) start_match = end_subject;
#endif
}
#endif
}
}
/* Or to just after a linebreak for a multiline match */

View File

@ -121,7 +121,7 @@ int
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
{
PCRE2_UCHAR c1, c2;
while (len-- > 0)
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
@ -150,7 +150,7 @@ int
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
{
PCRE2_UCHAR c1, c2;
while (len-- > 0)
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;

View File

@ -131,11 +131,13 @@ PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
*/
for (p = string; length-- > 0; p++)
for (p = string; length > 0; p++)
{
register uint32_t ab, d;
c = *p;
length--;
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
@ -324,9 +326,10 @@ PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
*/
for (p = string; length-- > 0; p++)
for (p = string; length > 0; p++)
{
c = *p;
length--;
if ((c & 0xf800) != 0xd800)
{
@ -368,7 +371,7 @@ PCRE2_ERROR_UTF32_ERR1 Surrogate character
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
*/
for (p = string; length-- > 0; p++)
for (p = string; length > 0; length--, p++)
{
c = *p;
if ((c & 0xfffff800u) != 0xd800u)

View File

@ -2606,7 +2606,7 @@ if (pbuffer16_size < 2*len + 2)
pp = pbuffer16;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
@ -2683,7 +2683,7 @@ if (pbuffer32_size < 4*len + 4)
pp = pbuffer32;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
@ -2723,9 +2723,8 @@ Returns: a possibly changed offset
static PCRE2_SIZE
backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
{
long int yield;
if (!utf || test_mode == PCRE32_MODE) yield = offset - count;
if (!utf || test_mode == PCRE32_MODE)
return (count >= offset)? 0 : (offset - count);
else if (test_mode == PCRE8_MODE)
{
@ -2735,7 +2734,7 @@ else if (test_mode == PCRE8_MODE)
pp--;
while ((*pp & 0xc0) == 0x80) pp--;
}
yield = pp - (PCRE2_SPTR8)subject;
return pp - (PCRE2_SPTR8)subject;
}
else /* 16-bit mode */
@ -2746,10 +2745,8 @@ else /* 16-bit mode */
pp--;
if ((*pp & 0xfc00) == 0xdc00) pp--;
}
yield = pp - (PCRE2_SPTR16)subject;
return pp - (PCRE2_SPTR16)subject;
}
return (yield >= 0)? yield : 0;
}
@ -2936,7 +2933,7 @@ while (top > bot)
if (c == 0)
{
if (len == mlen) return mid;
c = len - mlen;
c = (int)len - (int)mlen;
}
if (c > 0) bot = mid + 1; else top = mid;
}
@ -3712,7 +3709,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (namecount > 0)
{
fprintf(outfile, "Named capturing subpatterns:\n");
while (namecount-- > 0)
for (; namecount > 0; namecount--)
{
int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
@ -5378,7 +5375,7 @@ if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
/* Check for mutually exclusive modifiers. */
c = dat_datctl.control & EXCLUSIVE_DAT_CONTROLS;
if (c - (c & -c) != 0)
if (c != 0 && c != (c & (~c+1)))
{
show_controls(c, "** Not allowed together:");
fprintf(outfile, "\n");