Ticket #7452: Properly interpret escape sequences in character literals.

This commit is contained in:
Simon Martin 2016-05-15 15:48:24 +02:00
parent 9d1302d523
commit 3af30e728c
3 changed files with 102 additions and 53 deletions

View File

@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str)
return ret;
}
static bool isOctalDigitString(const std::string& str)
{
for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) {
if (!MathLib::isOctalDigit(*it))
return false;
}
return true;
}
static unsigned int encodeMultiChar(const std::string& str)
{
unsigned int retval(str.front());
@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
{
if (str.empty())
return 0; // for unit-testing...
if (str.size()==1)
return str[0] & 0xff;
if (str[0] != '\\') {
// C99 6.4.4.4
// The value of an integer character constant containing more than one character (e.g., 'ab'),
// or containing a character or escape sequence that does not map to a single-byte execution character,
// is implementation-defined.
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
return encodeMultiChar(str);
}
const std::string& str1 = str.substr(1);
switch (str1[0]) {
case 'x':
return toLongNumber("0x" + str.substr(2));
case 'u': // 16-bit unicode character
return encodeMultiChar(str1);
case 'U': // 32-bit unicode character
return encodeMultiChar(str1);
default: {
char c;
switch (str.size()-1) {
case 1:
switch (str[1]) {
// C99 6.4.4.4
// The value of an integer character constant containing more than one character (e.g., 'ab'),
// or containing a character or escape sequence that does not map to a single-byte execution character,
// is implementation-defined.
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
const std::string& normStr = normalizeCharacterLiteral(str);
return encodeMultiChar(normStr);
}
std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral)
{
std::string normalizedLiteral;
const std::string::size_type iLiteralLen = iLiteral.size();
for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) {
if (iLiteral[idx] != '\\') {
normalizedLiteral.push_back(iLiteral[idx]);
continue;
}
++idx;
if (idx == iLiteralLen) {
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
}
switch (iLiteral[idx]) {
case 'x':
// Hexa-decimal number: skip \x and interpret the next two characters
{
if (++idx == iLiteralLen)
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
std::string tempBuf;
tempBuf.push_back(iLiteral[idx]);
if (++idx != iLiteralLen)
tempBuf.push_back(iLiteral[idx]);
normalizedLiteral.push_back(static_cast<char>(MathLib::toULongNumber("0x" + tempBuf)));
continue;
}
case 'u':
case 'U':
// Unicode string; just skip the \u or \U
continue;
}
// Single digit octal number
if (1 == std::min<unsigned>(3, iLiteralLen - idx)) {
switch (iLiteral[idx]) {
case '0':
case '1':
case '2':
@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
case '5':
case '6':
case '7':
return str[1]-'0';
normalizedLiteral.push_back(iLiteral[idx]-'0');
break;
case 'a':
c = '\a';
normalizedLiteral.push_back('\a');
break;
case 'b':
c = '\b';
normalizedLiteral.push_back('\b');
break;
case 'e':
c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character
normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character
break;
case 'f':
c = '\f';
normalizedLiteral.push_back('\f');
break;
case 'n':
c = '\n';
normalizedLiteral.push_back('\n');
break;
case 'r':
c = '\r';
normalizedLiteral.push_back('\r');
break;
case 't':
c = '\t';
normalizedLiteral.push_back('\t');
break;
case 'v':
c = '\v';
normalizedLiteral.push_back('\v');
break;
case '\\':
case '\?':
case '\'':
case '\"':
c = str[1];
normalizedLiteral.push_back(iLiteral[idx]);
break;
default:
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
}
return c & 0xff;
case 2:
case 3:
if (isOctalDigitString(str1))
return toLongNumber("0" + str1);
break;
continue;
}
// 2-3 digit octal number
if (!MathLib::isOctalDigit(iLiteral[idx]))
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
std::string tempBuf;
tempBuf.push_back(iLiteral[idx]);
++idx;
if (MathLib::isOctalDigit(iLiteral[idx])) {
tempBuf.push_back(iLiteral[idx]);
++idx;
if (MathLib::isOctalDigit(iLiteral[idx])) {
tempBuf.push_back(iLiteral[idx]);
}
}
normalizedLiteral.push_back(static_cast<char>(MathLib::toLongNumber("0" + tempBuf)));
}
}
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
return normalizedLiteral;
}
MathLib::bigint MathLib::toLongNumber(const std::string & str)

View File

@ -31,6 +31,8 @@
/** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */
class CPPCHECKLIB MathLib {
friend class TestMathLib;
public:
/** @brief value class */
class value {
@ -120,6 +122,12 @@ public:
* */
static MathLib::bigint characterLiteralToLongNumber(const std::string& str);
private:
/*
* \param iLiteral A character literal
* \return The equivalent character literal with all escapes interpreted
*/
static std::string normalizeCharacterLiteral(const std::string& iLiteral);
};
MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2);

View File

@ -58,6 +58,7 @@ private:
TEST_CASE(tan);
TEST_CASE(abs);
TEST_CASE(toString);
TEST_CASE(characterLiteralsNormalization);
}
void isGreater() const {
@ -289,6 +290,7 @@ private:
ASSERT_EQUALS((int)('\34'), MathLib::toLongNumber("'\\34'"));
ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'"));
ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452
ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
// that is not gcc/clang encoding
@ -1119,6 +1121,27 @@ private:
ASSERT_EQUALS("0" , MathLib::toString(+0.0l));
ASSERT_EQUALS("-0" , MathLib::toString(-0.0L));
}
void characterLiteralsNormalization() {
// `A` is 0x41 and 0101
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41"));
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101"));
// Hexa and octal numbers should not only be intepreted in byte 1
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST"));
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST"));
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41"));
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101"));
// Single escape sequences
ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?"));
ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'"));
// Incomplete hexa and octal sequences
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError);
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError);
// No octal digit in an octal sequence
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError);
// Unsupported single escape sequence
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError);
}
};
REGISTER_TEST(TestMathLib)