Ticket #7452: Properly interpret escape sequences in character literals.

2016-05-15 15:48:24 +02:00 · 2016-05-15 15:48:24 +02:00 · 3af30e728c
commit 3af30e728c
parent 9d1302d523
3 changed files with 102 additions and 53 deletions
--- a/lib/mathlib.cpp
+++ b/lib/mathlib.cpp
@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str)
    return ret;
 }

-static bool isOctalDigitString(const std::string& str)
-{
-    for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) {
-        if (!MathLib::isOctalDigit(*it))
-            return false;
-    }
-    return true;
-}
-
 static unsigned int encodeMultiChar(const std::string& str)
 {
    unsigned int retval(str.front());
@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
 {
    if (str.empty())
        return 0; // for unit-testing...
-    if (str.size()==1)
-        return str[0] & 0xff;
-    if (str[0] != '\\') {
-        // C99 6.4.4.4
-        // The value of an integer character constant containing more than one character (e.g., 'ab'),
-        // or containing a character or escape sequence that does not map to a single-byte execution character,
-        // is implementation-defined.
-        // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
-        return encodeMultiChar(str);
-    }
-    const std::string& str1 = str.substr(1);

-    switch (str1[0]) {
-    case 'x':
-        return toLongNumber("0x" + str.substr(2));
-    case 'u': // 16-bit unicode character
-        return encodeMultiChar(str1);
-    case 'U': // 32-bit unicode character
-        return encodeMultiChar(str1);
-    default: {
-        char c;
-        switch (str.size()-1) {
-        case 1:
-            switch (str[1]) {
+    // C99 6.4.4.4
+    // The value of an integer character constant containing more than one character (e.g., 'ab'),
+    // or containing a character or escape sequence that does not map to a single-byte execution character,
+    // is implementation-defined.
+    // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
+    const std::string& normStr = normalizeCharacterLiteral(str);
+    return encodeMultiChar(normStr);
+}
+
+std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral)
+{
+    std::string normalizedLiteral;
+    const std::string::size_type iLiteralLen = iLiteral.size();
+    for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) {
+        if (iLiteral[idx] != '\\') {
+            normalizedLiteral.push_back(iLiteral[idx]);
+            continue;
+        }
+        ++idx;
+        if (idx == iLiteralLen) {
+            throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
+        }
+        switch (iLiteral[idx]) {
+        case 'x':
+            // Hexa-decimal number: skip \x and interpret the next two characters
+            {
+                if (++idx == iLiteralLen)
+                    throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
+                std::string tempBuf;
+                tempBuf.push_back(iLiteral[idx]);
+                if (++idx != iLiteralLen)
+                    tempBuf.push_back(iLiteral[idx]);
+                normalizedLiteral.push_back(static_cast<char>(MathLib::toULongNumber("0x" + tempBuf)));
+                continue;
+            }
+        case 'u':
+        case 'U':
+            // Unicode string; just skip the \u or \U
+            continue;
+        }
+        // Single digit octal number
+        if (1 == std::min<unsigned>(3, iLiteralLen - idx)) {
+            switch (iLiteral[idx]) {
            case '0':
            case '1':
            case '2':
@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
            case '5':
            case '6':
            case '7':
-                return str[1]-'0';
+                normalizedLiteral.push_back(iLiteral[idx]-'0');
+                break;
            case 'a':
-                c = '\a';
+                normalizedLiteral.push_back('\a');
                break;
            case 'b':
-                c = '\b';
+                normalizedLiteral.push_back('\b');
                break;
            case 'e':
-                c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character
+                normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character
                break;
            case 'f':
-                c = '\f';
+                normalizedLiteral.push_back('\f');
                break;
            case 'n':
-                c = '\n';
+                normalizedLiteral.push_back('\n');
                break;
            case 'r':
-                c = '\r';
+                normalizedLiteral.push_back('\r');
                break;
            case 't':
-                c = '\t';
+                normalizedLiteral.push_back('\t');
                break;
            case 'v':
-                c = '\v';
+                normalizedLiteral.push_back('\v');
                break;
            case '\\':
            case '\?':
            case '\'':
            case '\"':
-                c = str[1];
+                normalizedLiteral.push_back(iLiteral[idx]);
                break;
            default:
-                throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
+                throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
            }
-            return c & 0xff;
-        case 2:
-        case 3:
-            if (isOctalDigitString(str1))
-                return toLongNumber("0" + str1);
-            break;
-
+            continue;
        }
+        // 2-3 digit octal number
+        if (!MathLib::isOctalDigit(iLiteral[idx]))
+            throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
+        std::string tempBuf;
+        tempBuf.push_back(iLiteral[idx]);
+        ++idx;
+        if (MathLib::isOctalDigit(iLiteral[idx])) {
+            tempBuf.push_back(iLiteral[idx]);
+            ++idx;
+            if (MathLib::isOctalDigit(iLiteral[idx])) {
+                tempBuf.push_back(iLiteral[idx]);
+            }
+        }
+        normalizedLiteral.push_back(static_cast<char>(MathLib::toLongNumber("0" + tempBuf)));
    }
-    }
-
-    throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
+    return normalizedLiteral;
 }

 MathLib::bigint MathLib::toLongNumber(const std::string & str)
--- a/lib/mathlib.h
+++ b/lib/mathlib.h
@ -31,6 +31,8 @@
 /** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */

 class CPPCHECKLIB MathLib {
+    friend class TestMathLib;
+
 public:
    /** @brief value class */
    class value {
@ -120,6 +122,12 @@ public:
     * */
    static MathLib::bigint characterLiteralToLongNumber(const std::string& str);

+private:
+    /*
+     * \param iLiteral A character literal
+     * \return The equivalent character literal with all escapes interpreted
+     */
+    static std::string normalizeCharacterLiteral(const std::string& iLiteral);
 };

 MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2);
--- a/test/testmathlib.cpp
+++ b/test/testmathlib.cpp
@ -58,6 +58,7 @@ private:
        TEST_CASE(tan);
        TEST_CASE(abs);
        TEST_CASE(toString);
+        TEST_CASE(characterLiteralsNormalization);
    }

    void isGreater() const {
@ -289,6 +290,7 @@ private:
        ASSERT_EQUALS((int)('\34'),  MathLib::toLongNumber("'\\34'"));
        ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'"));
        ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
+        ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452
        ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
        ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
        // that is not gcc/clang encoding
@ -1119,6 +1121,27 @@ private:
        ASSERT_EQUALS("0"     , MathLib::toString(+0.0l));
        ASSERT_EQUALS("-0"    , MathLib::toString(-0.0L));
    }
+
+    void characterLiteralsNormalization() {
+        // `A` is 0x41 and 0101
+        ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41"));
+        ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101"));
+        // Hexa and octal numbers should not only be intepreted in byte 1
+        ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST"));
+        ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST"));
+        ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41"));
+        ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101"));
+        // Single escape sequences
+        ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?"));
+        ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'"));
+        // Incomplete hexa and octal sequences
+        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError);
+        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError);
+        // No octal digit in an octal sequence
+        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError);
+        // Unsupported single escape sequence
+        ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError);
+    }
 };

 REGISTER_TEST(TestMathLib)