Handle concatenated string and char literals

This handles concatenated strings and characters from simplecpp. Previously, L'c' would be preprocessed to the tokens "L" and "'c'". cppcheck would then remove the "L" token and set "'c'" to be a wide character literal. Now, it needs to remove the prefix instead. When doing this, add handling of utf32 encoded literals (U) and UTF-8 encoded literals (u8).
2019-03-10 10:38:50 +01:00 · 2019-03-10 10:38:50 +01:00 · 6a3dd9a185
parent 93194f47a1
commit 6a3dd9a185
4 changed files with 60 additions and 13 deletions
--- a/lib/token.cpp
+++ b/lib/token.cpp
@ -34,6 +34,19 @@
 #include <stack>
 #include <utility>

+static bool isStringCharLiteral(const std::string &str, char q)
+{
+
+    if (!endsWith(str, q))
+        return false;
+
+    const std::string prefix[5] = { "", "u8", "L", "U", "u" };
+    for (const std::string & p: prefix) {
+        if ((str.length() + 1) > p.length() && (str.find(p + q) == 0))
+            return true;
+    }
+    return false;
+}
 const std::list<ValueFlow::Value> TokenImpl::mEmptyValueList;

 Token::Token(TokensFrontBack *tokensFrontBack) :
@ -73,6 +86,10 @@ void Token::update_property_info()
    if (!mStr.empty()) {
        if (mStr == "true" || mStr == "false")
            tokType(eBoolean);
+        else if (isStringCharLiteral(mStr, '\"'))
+            tokType(eString);
+        else if (isStringCharLiteral(mStr, '\''))
+            tokType(eChar);
        else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name
            if (mImpl->mVarId)
                tokType(eVariable);
@ -80,10 +97,6 @@ void Token::update_property_info()
                tokType(eName);
        } else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1])))
            tokType(eNumber);
-        else if (mStr.length() > 1 && mStr[0] == '"' && endsWith(mStr,'"'))
-            tokType(eString);
-        else if (mStr.length() > 1 && mStr[0] == '\'' && endsWith(mStr,'\''))
-            tokType(eChar);
        else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" ||
                 (mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0])))
            tokType(eAssignmentOp);
--- a/lib/tokenize.cpp
+++ b/lib/tokenize.cpp
@ -1926,13 +1926,15 @@ void Tokenizer::combineOperators()

 void Tokenizer::combineStringAndCharLiterals()
 {
-    // Combine wide strings and wide characters
    for (Token *tok = list.front(); tok; tok = tok->next()) {
-        if (Token::Match(tok, "[Lu] %char%|%str%")) {
-            // Combine 'L "string"' and 'L 'c''
-            tok->str(tok->next()->str());
-            tok->deleteNext();
-            tok->isLong(true);
+        const std::string prefix[4] = {"u8", "L", "U", "u"};
+        for (const std::string & p : prefix) {
+            if (((tok->tokType() == Token::eString) && (tok->str().find(p + "\"") == 0)) ||
+                ((tok->tokType() == Token::eChar) && (tok->str().find(p + "\'") == 0))) {
+                tok->str(tok->str().substr(p.size()));
+                tok->isLong(p != "u8");
+                break;
+            }
        }
    }

--- a/test/testsimplifytokens.cpp
+++ b/test/testsimplifytokens.cpp
@ -115,6 +115,8 @@ private:

        TEST_CASE(combine_wstrings);
        TEST_CASE(combine_ustrings);
+        TEST_CASE(combine_Ustrings);
+        TEST_CASE(combine_u8strings);

        // Simplify "not" to "!" (#345)
        TEST_CASE(not1);
@ -507,9 +509,9 @@ private:
    }

    void combine_ustrings() {
-        const char code[] =  "abc = u\"abc\";";
+        const char code[] =  "abcd = u\"ab\" u\"cd\";";

-        const char expected[] =  "abc = \"abc\" ;";
+        const char expected[] =  "abcd = \"abcd\" ;";

        Tokenizer tokenizer(&settings0, this);
        std::istringstream istr(code);
@ -519,6 +521,33 @@ private:
        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
    }

+    void combine_Ustrings() {
+        const char code[] =  "abcd = U\"ab\" U\"cd\";";
+
+        const char expected[] =  "abcd = \"abcd\" ;";
+
+        Tokenizer tokenizer(&settings0, this);
+        std::istringstream istr(code);
+        tokenizer.tokenize(istr, "test.cpp");
+
+        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
+        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
+    }
+
+    void combine_u8strings() {
+        const char code[] =  "abcd = u8\"ab\" u8\"cd\";";
+
+        const char expected[] =  "abcd = \"abcd\" ;";
+
+
+        Tokenizer tokenizer(&settings0, this);
+        std::istringstream istr(code);
+        tokenizer.tokenize(istr, "test.cpp");
+
+        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
+        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
+    }
+
    void double_plus() {
        {
            const char code1[] =  "void foo( int a )\n"
--- a/test/testtokenize.cpp
+++ b/test/testtokenize.cpp
@ -8210,6 +8210,7 @@ private:
        if (!tokenList.list.createTokens(istr,"test.cpp"))
            return "ERROR";

+        tokenList.combineStringAndCharLiterals();
        tokenList.combineOperators();
        tokenList.createLinks();
        tokenList.createLinks2();
@ -8295,9 +8296,11 @@ private:

        ASSERT_EQUALS("a\"\"=", testAst("a=\"\""));
        ASSERT_EQUALS("a\'\'=", testAst("a=\'\'"));
-
        ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')"));
        ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')"));
+        ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')"));
+        ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')"));
+        ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')"));

        ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;"));
        ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));