Merge pull request #794 from simartin/ticket_7452

Ticket #7452: Properly interpret escape sequences in character literals.
This commit is contained in:
amai2012 2016-05-15 20:35:50 +02:00
commit 22542e7547
3 changed files with 102 additions and 53 deletions

View File

@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str)
return ret; return ret;
} }
static bool isOctalDigitString(const std::string& str)
{
for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) {
if (!MathLib::isOctalDigit(*it))
return false;
}
return true;
}
static unsigned int encodeMultiChar(const std::string& str) static unsigned int encodeMultiChar(const std::string& str)
{ {
unsigned int retval(str.front()); unsigned int retval(str.front());
@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
{ {
if (str.empty()) if (str.empty())
return 0; // for unit-testing... return 0; // for unit-testing...
if (str.size()==1)
return str[0] & 0xff;
if (str[0] != '\\') {
// C99 6.4.4.4 // C99 6.4.4.4
// The value of an integer character constant containing more than one character (e.g., 'ab'), // The value of an integer character constant containing more than one character (e.g., 'ab'),
// or containing a character or escape sequence that does not map to a single-byte execution character, // or containing a character or escape sequence that does not map to a single-byte execution character,
// is implementation-defined. // is implementation-defined.
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B') // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
return encodeMultiChar(str); const std::string& normStr = normalizeCharacterLiteral(str);
} return encodeMultiChar(normStr);
const std::string& str1 = str.substr(1); }
switch (str1[0]) { std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral)
{
std::string normalizedLiteral;
const std::string::size_type iLiteralLen = iLiteral.size();
for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) {
if (iLiteral[idx] != '\\') {
normalizedLiteral.push_back(iLiteral[idx]);
continue;
}
++idx;
if (idx == iLiteralLen) {
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
}
switch (iLiteral[idx]) {
case 'x': case 'x':
return toLongNumber("0x" + str.substr(2)); // Hexa-decimal number: skip \x and interpret the next two characters
case 'u': // 16-bit unicode character {
return encodeMultiChar(str1); if (++idx == iLiteralLen)
case 'U': // 32-bit unicode character throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
return encodeMultiChar(str1); std::string tempBuf;
default: { tempBuf.push_back(iLiteral[idx]);
char c; if (++idx != iLiteralLen)
switch (str.size()-1) { tempBuf.push_back(iLiteral[idx]);
case 1: normalizedLiteral.push_back(static_cast<char>(MathLib::toULongNumber("0x" + tempBuf)));
switch (str[1]) { continue;
}
case 'u':
case 'U':
// Unicode string; just skip the \u or \U
continue;
}
// Single digit octal number
if (1 == std::min<unsigned>(3, iLiteralLen - idx)) {
switch (iLiteral[idx]) {
case '0': case '0':
case '1': case '1':
case '2': case '2':
@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
case '5': case '5':
case '6': case '6':
case '7': case '7':
return str[1]-'0'; normalizedLiteral.push_back(iLiteral[idx]-'0');
break;
case 'a': case 'a':
c = '\a'; normalizedLiteral.push_back('\a');
break; break;
case 'b': case 'b':
c = '\b'; normalizedLiteral.push_back('\b');
break; break;
case 'e': case 'e':
c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character
break; break;
case 'f': case 'f':
c = '\f'; normalizedLiteral.push_back('\f');
break; break;
case 'n': case 'n':
c = '\n'; normalizedLiteral.push_back('\n');
break; break;
case 'r': case 'r':
c = '\r'; normalizedLiteral.push_back('\r');
break; break;
case 't': case 't':
c = '\t'; normalizedLiteral.push_back('\t');
break; break;
case 'v': case 'v':
c = '\v'; normalizedLiteral.push_back('\v');
break; break;
case '\\': case '\\':
case '\?': case '\?':
case '\'': case '\'':
case '\"': case '\"':
c = str[1]; normalizedLiteral.push_back(iLiteral[idx]);
break; break;
default: default:
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'."); throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
} }
return c & 0xff; continue;
case 2: }
case 3: // 2-3 digit octal number
if (isOctalDigitString(str1)) if (!MathLib::isOctalDigit(iLiteral[idx]))
return toLongNumber("0" + str1); throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
break; std::string tempBuf;
tempBuf.push_back(iLiteral[idx]);
++idx;
if (MathLib::isOctalDigit(iLiteral[idx])) {
tempBuf.push_back(iLiteral[idx]);
++idx;
if (MathLib::isOctalDigit(iLiteral[idx])) {
tempBuf.push_back(iLiteral[idx]);
} }
} }
normalizedLiteral.push_back(static_cast<char>(MathLib::toLongNumber("0" + tempBuf)));
} }
return normalizedLiteral;
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
} }
MathLib::bigint MathLib::toLongNumber(const std::string & str) MathLib::bigint MathLib::toLongNumber(const std::string & str)

View File

@ -31,6 +31,8 @@
/** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */ /** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */
class CPPCHECKLIB MathLib { class CPPCHECKLIB MathLib {
friend class TestMathLib;
public: public:
/** @brief value class */ /** @brief value class */
class value { class value {
@ -120,6 +122,12 @@ public:
* */ * */
static MathLib::bigint characterLiteralToLongNumber(const std::string& str); static MathLib::bigint characterLiteralToLongNumber(const std::string& str);
private:
/*
* \param iLiteral A character literal
* \return The equivalent character literal with all escapes interpreted
*/
static std::string normalizeCharacterLiteral(const std::string& iLiteral);
}; };
MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2); MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2);

View File

@ -58,6 +58,7 @@ private:
TEST_CASE(tan); TEST_CASE(tan);
TEST_CASE(abs); TEST_CASE(abs);
TEST_CASE(toString); TEST_CASE(toString);
TEST_CASE(characterLiteralsNormalization);
} }
void isGreater() const { void isGreater() const {
@ -289,6 +290,7 @@ private:
ASSERT_EQUALS((int)('\34'), MathLib::toLongNumber("'\\34'")); ASSERT_EQUALS((int)('\34'), MathLib::toLongNumber("'\\34'"));
ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'")); ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'"));
ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'")); ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452
ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError); ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError); ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
// that is not gcc/clang encoding // that is not gcc/clang encoding
@ -1119,6 +1121,27 @@ private:
ASSERT_EQUALS("0" , MathLib::toString(+0.0l)); ASSERT_EQUALS("0" , MathLib::toString(+0.0l));
ASSERT_EQUALS("-0" , MathLib::toString(-0.0L)); ASSERT_EQUALS("-0" , MathLib::toString(-0.0L));
} }
void characterLiteralsNormalization() {
// `A` is 0x41 and 0101
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41"));
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101"));
// Hexa and octal numbers should not only be intepreted in byte 1
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST"));
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST"));
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41"));
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101"));
// Single escape sequences
ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?"));
ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'"));
// Incomplete hexa and octal sequences
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError);
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError);
// No octal digit in an octal sequence
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError);
// Unsupported single escape sequence
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError);
}
}; };
REGISTER_TEST(TestMathLib) REGISTER_TEST(TestMathLib)