Merge pull request #794 from simartin/ticket_7452

Ticket #7452: Properly interpret escape sequences in character literals.
2016-05-15 20:35:50 +02:00 · 2016-05-15 20:35:50 +02:00 · 22542e7547
parent 2668cee3cf 3af30e728c
commit 22542e7547
3 changed files with 102 additions and 53 deletions
--- a/lib/mathlib.cpp
+++ b/lib/mathlib.cpp
@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str)
    return ret;
 }
 static bool isOctalDigitString(const std::string& str)
 {
    for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) {
        if (!MathLib::isOctalDigit(*it))
            return false;
    }
    return true;
 }
 static unsigned int encodeMultiChar(const std::string& str)
 {
    unsigned int retval(str.front());
@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
 {
    if (str.empty())
        return 0; // for unit-testing...
    if (str.size()==1)
        return str[0] & 0xff;
    if (str[0] != '\\') {
        // C99 6.4.4.4
        // The value of an integer character constant containing more than one character (e.g., 'ab'),
        // or containing a character or escape sequence that does not map to a single-byte execution character,
        // is implementation-defined.
        // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
        return encodeMultiChar(str);
    }
    const std::string& str1 = str.substr(1);
-    switch (str1[0]) {
+    // C99 6.4.4.4
-    case 'x':
+    // The value of an integer character constant containing more than one character (e.g., 'ab'),
-        return toLongNumber("0x" + str.substr(2));
+    // or containing a character or escape sequence that does not map to a single-byte execution character,
-    case 'u': // 16-bit unicode character
+    // is implementation-defined.
-        return encodeMultiChar(str1);
+    // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
-    case 'U': // 32-bit unicode character
+    const std::string& normStr = normalizeCharacterLiteral(str);
-        return encodeMultiChar(str1);
+    return encodeMultiChar(normStr);
-    default: {
+}
-        char c;
+
-        switch (str.size()-1) {
+std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral)
-        case 1:
+{
-            switch (str[1]) {
+    std::string normalizedLiteral;
    const std::string::size_type iLiteralLen = iLiteral.size();
    for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) {
        if (iLiteral[idx] != '\\') {
            normalizedLiteral.push_back(iLiteral[idx]);
            continue;
        }
        ++idx;
        if (idx == iLiteralLen) {
            throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
        }
        switch (iLiteral[idx]) {
        case 'x':
            // Hexa-decimal number: skip \x and interpret the next two characters
            {
                if (++idx == iLiteralLen)
                    throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
                std::string tempBuf;
                tempBuf.push_back(iLiteral[idx]);
                if (++idx != iLiteralLen)
                    tempBuf.push_back(iLiteral[idx]);
                normalizedLiteral.push_back(static_cast<char>(MathLib::toULongNumber("0x" + tempBuf)));
                continue;
            }
        case 'u':
        case 'U':
            // Unicode string; just skip the \u or \U
            continue;
        }
        // Single digit octal number
        if (1 == std::min<unsigned>(3, iLiteralLen - idx)) {
            switch (iLiteral[idx]) {
            case '0':
            case '1':
            case '2':
@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
            case '5':
            case '6':
            case '7':
-                return str[1]-'0';
+                normalizedLiteral.push_back(iLiteral[idx]-'0');
                break;
            case 'a':
-                c = '\a';
+                normalizedLiteral.push_back('\a');
                break;
            case 'b':
-                c = '\b';
+                normalizedLiteral.push_back('\b');
                break;
            case 'e':
-                c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character
+                normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character
                break;
            case 'f':
-                c = '\f';
+                normalizedLiteral.push_back('\f');
                break;
            case 'n':
-                c = '\n';
+                normalizedLiteral.push_back('\n');
                break;
            case 'r':
-                c = '\r';
+                normalizedLiteral.push_back('\r');
                break;
            case 't':
-                c = '\t';
+                normalizedLiteral.push_back('\t');
                break;
            case 'v':
-                c = '\v';
+                normalizedLiteral.push_back('\v');
                break;
            case '\\':
            case '\?':
            case '\'':
            case '\"':
-                c = str[1];
+                normalizedLiteral.push_back(iLiteral[idx]);
                break;
            default:
-                throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
+                throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
            }
-            return c & 0xff;
+            continue;
        case 2:
        case 3:
            if (isOctalDigitString(str1))
                return toLongNumber("0" + str1);
            break;
        }
        // 2-3 digit octal number
        if (!MathLib::isOctalDigit(iLiteral[idx]))
            throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
        std::string tempBuf;
        tempBuf.push_back(iLiteral[idx]);
        ++idx;
        if (MathLib::isOctalDigit(iLiteral[idx])) {
            tempBuf.push_back(iLiteral[idx]);
            ++idx;
            if (MathLib::isOctalDigit(iLiteral[idx])) {
                tempBuf.push_back(iLiteral[idx]);
            }
        }
        normalizedLiteral.push_back(static_cast<char>(MathLib::toLongNumber("0" + tempBuf)));
    }
-    }
+    return normalizedLiteral;
    throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
 }
 MathLib::bigint MathLib::toLongNumber(const std::string & str)
--- a/lib/mathlib.h
+++ b/lib/mathlib.h
@ -31,6 +31,8 @@
 /** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */
 class CPPCHECKLIB MathLib {
    friend class TestMathLib;
 public:
    /** @brief value class */
    class value {
@ -120,6 +122,12 @@ public:
     * */
    static MathLib::bigint characterLiteralToLongNumber(const std::string& str);
 private:
    /*
     * \param iLiteral A character literal
     * \return The equivalent character literal with all escapes interpreted
     */
    static std::string normalizeCharacterLiteral(const std::string& iLiteral);
 };
 MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2);
--- a/test/testmathlib.cpp
+++ b/test/testmathlib.cpp
@ -58,6 +58,7 @@ private:
        TEST_CASE(tan);
        TEST_CASE(abs);
        TEST_CASE(toString);
        TEST_CASE(characterLiteralsNormalization);
    }
    void isGreater() const {
@ -289,6 +290,7 @@ private:
        ASSERT_EQUALS((int)('\34'),  MathLib::toLongNumber("'\\34'"));
        ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'"));
        ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
        ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452
        ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
        ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
        // that is not gcc/clang encoding
@ -1119,6 +1121,27 @@ private:
        ASSERT_EQUALS("0"     , MathLib::toString(+0.0l));
        ASSERT_EQUALS("-0"    , MathLib::toString(-0.0L));
    }
    void characterLiteralsNormalization() {
        // `A` is 0x41 and 0101
        ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41"));
        ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101"));
        // Hexa and octal numbers should not only be intepreted in byte 1
        ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST"));
        ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST"));
        ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41"));
        ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101"));
        // Single escape sequences
        ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?"));
        ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'"));
        // Incomplete hexa and octal sequences
        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError);
        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError);
        // No octal digit in an octal sequence
        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError);
        // Unsupported single escape sequence
        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError);
    }
 };
 REGISTER_TEST(TestMathLib)