Token::ConcatStr(): Handle mixed string literals (#2337)

Improve handling of adjacent string literals of different types. Example of adjacent string literals: "ab" L"cd". In C89, C++98 and C++03, this is undefined. As of C99 and C++11, this is well defined and the two string literals are concatenated to L"abcd". C11 and C++11 introduces the utf16, utf32 and (C++ only) utf8 string types. Concatenating any of these with a regular c-string works exactely as the wide string example above. The result of having two adjacent string literals with different prefix is implementation defined, unless one is an UTF-8 string literal and the other is a wide string literal. In this case the behaviour is undefined. Ignore the undefined and ill-formed programs (this behaviour is unchanged) and make sure that concatenating a plain c string literal with a prefixed one works correct (in C99 and C++11 and later versions). It also makes the behaviour consistent since previously, "ab" L"cd" would result in "abcd" while L"ab" "cd" would result in L"abcd". It also means the somewhat awkward updatePropertiesConcatStr() test can be removed since the added tests would not work if update_properties() was not called in concatStr(). Since the prefix is stored in the token, testing the type of the string is not relevant in TestSimplifyTokens. It is tested extensively in TestToken::stringTypes().
2019-11-08 08:03:45 +01:00 · 2019-11-08 08:03:45 +01:00 · 1fcbd696be
parent 650408a210
commit 1fcbd696be
3 changed files with 60 additions and 22 deletions
--- a/lib/token.cpp
+++ b/lib/token.cpp
@ -175,6 +175,9 @@ void Token::concatStr(std::string const& b)
    mStr.erase(mStr.length() - 1);
    mStr.append(getStringLiteral(b) + "\"");

+    if (isCChar() && isStringLiteral(b) && b[0] != '"') {
+        mStr.insert(0, b.substr(0, b.find('"')));
+    }
    update_property_info();
 }

--- a/test/testsimplifytokens.cpp
+++ b/test/testsimplifytokens.cpp
@ -90,7 +90,14 @@ private:

        TEST_CASE(cast);
        TEST_CASE(iftruefalse);
+
        TEST_CASE(combine_strings);
+        TEST_CASE(combine_wstrings);
+        TEST_CASE(combine_ustrings);
+        TEST_CASE(combine_Ustrings);
+        TEST_CASE(combine_u8strings);
+        TEST_CASE(combine_mixedstrings);
+
        TEST_CASE(double_plus);
        TEST_CASE(redundant_plus);
        TEST_CASE(redundant_plus_numbers);
@ -143,11 +150,6 @@ private:
        TEST_CASE(doWhileAssign); // varid
        TEST_CASE(test_4881); // similar to doWhileAssign (#4911), taken from #4881 with full code

-        TEST_CASE(combine_wstrings);
-        TEST_CASE(combine_ustrings);
-        TEST_CASE(combine_Ustrings);
-        TEST_CASE(combine_u8strings);
-
        // Simplify "not" to "!" (#345)
        TEST_CASE(not1);

@ -1811,7 +1813,6 @@ private:
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void combine_ustrings() {
@ -1824,7 +1825,6 @@ private:
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void combine_Ustrings() {
@ -1837,7 +1837,6 @@ private:
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void combine_u8strings() {
@ -1845,13 +1844,23 @@ private:

        const char expected[] =  "abcd = u8\"abcd\" ;";

+        Tokenizer tokenizer(&settings0, this);
+        std::istringstream istr(code);
+        tokenizer.tokenize(istr, "test.cpp");
+
+        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
+    }
+
+    void combine_mixedstrings() {
+        const char code[] = "abcdef = \"ab\" L\"cd\" \"ef\";";
+
+        const char expected[] =  "abcdef = L\"abcdef\" ;";

        Tokenizer tokenizer(&settings0, this);
        std::istringstream istr(code);
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void double_plus() {
--- a/test/testtoken.cpp
+++ b/test/testtoken.cpp
@ -62,6 +62,7 @@ private:
        TEST_CASE(getStrSize);
        TEST_CASE(getCharAt);
        TEST_CASE(strValue);
+        TEST_CASE(concatStr);

        TEST_CASE(deleteLast);
        TEST_CASE(deleteFirst);
@ -92,7 +93,6 @@ private:
        TEST_CASE(operators);

        TEST_CASE(updateProperties)
-        TEST_CASE(updatePropertiesConcatStr)
        TEST_CASE(isNameGuarantees1)
        TEST_CASE(isNameGuarantees2)
        TEST_CASE(isNameGuarantees3)
@ -462,6 +462,44 @@ private:
        ASSERT_EQUALS("a", tok.strValue());
    }

+    void concatStr() const {
+        Token tok;
+
+        tok.str("\"\"");
+        tok.concatStr("\"\"");
+        ASSERT_EQUALS("", tok.strValue());
+        ASSERT(tok.isCChar());
+
+        tok.str("\"ab\"");
+        tok.concatStr("\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isCChar());
+
+        tok.str("L\"ab\"");
+        tok.concatStr("L\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("L\"ab\"");
+        tok.concatStr("\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("L\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("L\"\"");
+        ASSERT_EQUALS("ab", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("u8\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isUtf8());
+    }

    void deleteLast() const {
        TokensFrontBack listEnds{ nullptr };
@ -977,18 +1015,6 @@ private:
        ASSERT_EQUALS(true, tok.isNumber());
    }

-    void updatePropertiesConcatStr() const {
-        Token tok;
-        tok.str("true");
-
-        ASSERT_EQUALS(true, tok.isBoolean());
-
-        tok.concatStr("123");
-
-        ASSERT_EQUALS(false, tok.isBoolean());
-        ASSERT_EQUALS("tru\"", tok.str());
-    }
-
    void isNameGuarantees1() const {
        Token tok;
        tok.str("Name");