diff --git a/lib/mathlib.cpp b/lib/mathlib.cpp index 3992345b6..4773b8c20 100644 --- a/lib/mathlib.cpp +++ b/lib/mathlib.cpp @@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str) return ret; } -static bool isOctalDigitString(const std::string& str) -{ - for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) { - if (!MathLib::isOctalDigit(*it)) - return false; - } - return true; -} - static unsigned int encodeMultiChar(const std::string& str) { unsigned int retval(str.front()); @@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str) { if (str.empty()) return 0; // for unit-testing... - if (str.size()==1) - return str[0] & 0xff; - if (str[0] != '\\') { - // C99 6.4.4.4 - // The value of an integer character constant containing more than one character (e.g., 'ab'), - // or containing a character or escape sequence that does not map to a single-byte execution character, - // is implementation-defined. - // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B') - return encodeMultiChar(str); - } - const std::string& str1 = str.substr(1); - switch (str1[0]) { - case 'x': - return toLongNumber("0x" + str.substr(2)); - case 'u': // 16-bit unicode character - return encodeMultiChar(str1); - case 'U': // 32-bit unicode character - return encodeMultiChar(str1); - default: { - char c; - switch (str.size()-1) { - case 1: - switch (str[1]) { + // C99 6.4.4.4 + // The value of an integer character constant containing more than one character (e.g., 'ab'), + // or containing a character or escape sequence that does not map to a single-byte execution character, + // is implementation-defined. + // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B') + const std::string& normStr = normalizeCharacterLiteral(str); + return encodeMultiChar(normStr); +} + +std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral) +{ + std::string normalizedLiteral; + const std::string::size_type iLiteralLen = iLiteral.size(); + for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) { + if (iLiteral[idx] != '\\') { + normalizedLiteral.push_back(iLiteral[idx]); + continue; + } + ++idx; + if (idx == iLiteralLen) { + throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'."); + } + switch (iLiteral[idx]) { + case 'x': + // Hexa-decimal number: skip \x and interpret the next two characters + { + if (++idx == iLiteralLen) + throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'."); + std::string tempBuf; + tempBuf.push_back(iLiteral[idx]); + if (++idx != iLiteralLen) + tempBuf.push_back(iLiteral[idx]); + normalizedLiteral.push_back(static_cast(MathLib::toULongNumber("0x" + tempBuf))); + continue; + } + case 'u': + case 'U': + // Unicode string; just skip the \u or \U + continue; + } + // Single digit octal number + if (1 == std::min(3, iLiteralLen - idx)) { + switch (iLiteral[idx]) { case '0': case '1': case '2': @@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str) case '5': case '6': case '7': - return str[1]-'0'; + normalizedLiteral.push_back(iLiteral[idx]-'0'); + break; case 'a': - c = '\a'; + normalizedLiteral.push_back('\a'); break; case 'b': - c = '\b'; + normalizedLiteral.push_back('\b'); break; case 'e': - c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character + normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character break; case 'f': - c = '\f'; + normalizedLiteral.push_back('\f'); break; case 'n': - c = '\n'; + normalizedLiteral.push_back('\n'); break; case 'r': - c = '\r'; + normalizedLiteral.push_back('\r'); break; case 't': - c = '\t'; + normalizedLiteral.push_back('\t'); break; case 'v': - c = '\v'; + normalizedLiteral.push_back('\v'); break; case '\\': case '\?': case '\'': case '\"': - c = str[1]; + normalizedLiteral.push_back(iLiteral[idx]); break; default: - throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'."); + throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'."); } - return c & 0xff; - case 2: - case 3: - if (isOctalDigitString(str1)) - return toLongNumber("0" + str1); - break; - + continue; } + // 2-3 digit octal number + if (!MathLib::isOctalDigit(iLiteral[idx])) + throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'."); + std::string tempBuf; + tempBuf.push_back(iLiteral[idx]); + ++idx; + if (MathLib::isOctalDigit(iLiteral[idx])) { + tempBuf.push_back(iLiteral[idx]); + ++idx; + if (MathLib::isOctalDigit(iLiteral[idx])) { + tempBuf.push_back(iLiteral[idx]); + } + } + normalizedLiteral.push_back(static_cast(MathLib::toLongNumber("0" + tempBuf))); } - } - - throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'."); + return normalizedLiteral; } MathLib::bigint MathLib::toLongNumber(const std::string & str) diff --git a/lib/mathlib.h b/lib/mathlib.h index 14decf4a5..067eb639c 100644 --- a/lib/mathlib.h +++ b/lib/mathlib.h @@ -31,6 +31,8 @@ /** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */ class CPPCHECKLIB MathLib { + friend class TestMathLib; + public: /** @brief value class */ class value { @@ -120,6 +122,12 @@ public: * */ static MathLib::bigint characterLiteralToLongNumber(const std::string& str); +private: + /* + * \param iLiteral A character literal + * \return The equivalent character literal with all escapes interpreted + */ + static std::string normalizeCharacterLiteral(const std::string& iLiteral); }; MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2); diff --git a/test/testmathlib.cpp b/test/testmathlib.cpp index 9d1a5695d..8cc7258d1 100644 --- a/test/testmathlib.cpp +++ b/test/testmathlib.cpp @@ -58,6 +58,7 @@ private: TEST_CASE(tan); TEST_CASE(abs); TEST_CASE(toString); + TEST_CASE(characterLiteralsNormalization); } void isGreater() const { @@ -289,6 +290,7 @@ private: ASSERT_EQUALS((int)('\34'), MathLib::toLongNumber("'\\34'")); ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'")); ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'")); + ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452 ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError); ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError); // that is not gcc/clang encoding @@ -1119,6 +1121,27 @@ private: ASSERT_EQUALS("0" , MathLib::toString(+0.0l)); ASSERT_EQUALS("-0" , MathLib::toString(-0.0L)); } + + void characterLiteralsNormalization() { + // `A` is 0x41 and 0101 + ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41")); + ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101")); + // Hexa and octal numbers should not only be intepreted in byte 1 + ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST")); + ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST")); + ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41")); + ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101")); + // Single escape sequences + ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?")); + ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'")); + // Incomplete hexa and octal sequences + ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError); + ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError); + // No octal digit in an octal sequence + ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError); + // Unsupported single escape sequence + ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError); + } }; REGISTER_TEST(TestMathLib)