From 1fcbd696be746f865e4a9917f731db0a349285d3 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Fri, 8 Nov 2019 08:03:45 +0100
Subject: [PATCH] Token::ConcatStr(): Handle mixed string literals (#2337)

Improve handling of adjacent string literals of different types.

Example of adjacent string literals: "ab" L"cd".

In C89, C++98 and C++03, this is undefined. As of C99 and C++11, this is
well defined and the two string literals are concatenated to L"abcd".
C11 and C++11 introduces the utf16, utf32 and (C++ only) utf8 string
types. Concatenating any of these with a regular c-string works exactely
as the wide string example above. The result of having two adjacent
string literals with different prefix is implementation defined, unless
one is an UTF-8 string literal and the other is a wide string literal.
In this case the behaviour is undefined.

Ignore the undefined and ill-formed programs (this behaviour is unchanged)
and make sure that concatenating a plain c string literal with a prefixed
one works correct (in C99 and C++11 and later versions). It also makes the
behaviour consistent since previously, "ab" L"cd" would result in "abcd"
while L"ab" "cd" would result in L"abcd".

It also means the somewhat awkward updatePropertiesConcatStr() test can
be removed since the added tests would not work if update_properties()
was not called in concatStr().

Since the prefix is stored in the token, testing the type of the string
is not relevant in TestSimplifyTokens. It is tested extensively in
TestToken::stringTypes().
---
 lib/token.cpp               |  3 +++
 test/testsimplifytokens.cpp | 27 ++++++++++++-------
 test/testtoken.cpp          | 52 +++++++++++++++++++++++++++----------
 3 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/lib/token.cpp b/lib/token.cpp
index f92742d61..89d9bf0c5 100644
--- a/lib/token.cpp
+++ b/lib/token.cpp
@@ -175,6 +175,9 @@ void Token::concatStr(std::string const& b)
     mStr.erase(mStr.length() - 1);
     mStr.append(getStringLiteral(b) + "\"");
 
+    if (isCChar() && isStringLiteral(b) && b[0] != '"') {
+        mStr.insert(0, b.substr(0, b.find('"')));
+    }
     update_property_info();
 }
 
diff --git a/test/testsimplifytokens.cpp b/test/testsimplifytokens.cpp
index 2cf3747e7..4327081f3 100644
--- a/test/testsimplifytokens.cpp
+++ b/test/testsimplifytokens.cpp
@@ -90,7 +90,14 @@ private:
 
         TEST_CASE(cast);
         TEST_CASE(iftruefalse);
+
         TEST_CASE(combine_strings);
+        TEST_CASE(combine_wstrings);
+        TEST_CASE(combine_ustrings);
+        TEST_CASE(combine_Ustrings);
+        TEST_CASE(combine_u8strings);
+        TEST_CASE(combine_mixedstrings);
+
         TEST_CASE(double_plus);
         TEST_CASE(redundant_plus);
         TEST_CASE(redundant_plus_numbers);
@@ -143,11 +150,6 @@ private:
         TEST_CASE(doWhileAssign); // varid
         TEST_CASE(test_4881); // similar to doWhileAssign (#4911), taken from #4881 with full code
 
-        TEST_CASE(combine_wstrings);
-        TEST_CASE(combine_ustrings);
-        TEST_CASE(combine_Ustrings);
-        TEST_CASE(combine_u8strings);
-
         // Simplify "not" to "!" (#345)
         TEST_CASE(not1);
 
@@ -1811,7 +1813,6 @@ private:
         tokenizer.tokenize(istr, "test.cpp");
 
         ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
     }
 
     void combine_ustrings() {
@@ -1824,7 +1825,6 @@ private:
         tokenizer.tokenize(istr, "test.cpp");
 
         ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
     }
 
     void combine_Ustrings() {
@@ -1837,7 +1837,6 @@ private:
         tokenizer.tokenize(istr, "test.cpp");
 
         ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
     }
 
     void combine_u8strings() {
@@ -1845,13 +1844,23 @@ private:
 
         const char expected[] =  "abcd = u8\"abcd\" ;";
 
+        Tokenizer tokenizer(&settings0, this);
+        std::istringstream istr(code);
+        tokenizer.tokenize(istr, "test.cpp");
+
+        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
+    }
+
+    void combine_mixedstrings() {
+        const char code[] = "abcdef = \"ab\" L\"cd\" \"ef\";";
+
+        const char expected[] =  "abcdef = L\"abcdef\" ;";
 
         Tokenizer tokenizer(&settings0, this);
         std::istringstream istr(code);
         tokenizer.tokenize(istr, "test.cpp");
 
         ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
     }
 
     void double_plus() {
diff --git a/test/testtoken.cpp b/test/testtoken.cpp
index e2ef5a75f..37c0768c3 100644
--- a/test/testtoken.cpp
+++ b/test/testtoken.cpp
@@ -62,6 +62,7 @@ private:
         TEST_CASE(getStrSize);
         TEST_CASE(getCharAt);
         TEST_CASE(strValue);
+        TEST_CASE(concatStr);
 
         TEST_CASE(deleteLast);
         TEST_CASE(deleteFirst);
@@ -92,7 +93,6 @@ private:
         TEST_CASE(operators);
 
         TEST_CASE(updateProperties)
-        TEST_CASE(updatePropertiesConcatStr)
         TEST_CASE(isNameGuarantees1)
         TEST_CASE(isNameGuarantees2)
         TEST_CASE(isNameGuarantees3)
@@ -462,6 +462,44 @@ private:
         ASSERT_EQUALS("a", tok.strValue());
     }
 
+    void concatStr() const {
+        Token tok;
+
+        tok.str("\"\"");
+        tok.concatStr("\"\"");
+        ASSERT_EQUALS("", tok.strValue());
+        ASSERT(tok.isCChar());
+
+        tok.str("\"ab\"");
+        tok.concatStr("\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isCChar());
+
+        tok.str("L\"ab\"");
+        tok.concatStr("L\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("L\"ab\"");
+        tok.concatStr("\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("L\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("L\"\"");
+        ASSERT_EQUALS("ab", tok.strValue());
+        ASSERT(tok.isLong());
+
+        tok.str("\"ab\"");
+        tok.concatStr("u8\"cd\"");
+        ASSERT_EQUALS("abcd", tok.strValue());
+        ASSERT(tok.isUtf8());
+    }
 
     void deleteLast() const {
         TokensFrontBack listEnds{ nullptr };
@@ -977,18 +1015,6 @@ private:
         ASSERT_EQUALS(true, tok.isNumber());
     }
 
-    void updatePropertiesConcatStr() const {
-        Token tok;
-        tok.str("true");
-
-        ASSERT_EQUALS(true, tok.isBoolean());
-
-        tok.concatStr("123");
-
-        ASSERT_EQUALS(false, tok.isBoolean());
-        ASSERT_EQUALS("tru\"", tok.str());
-    }
-
     void isNameGuarantees1() const {
         Token tok;
         tok.str("Name");