From 1fcbd696be746f865e4a9917f731db0a349285d3 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Fri, 8 Nov 2019 08:03:45 +0100 Subject: [PATCH] Token::ConcatStr(): Handle mixed string literals (#2337) Improve handling of adjacent string literals of different types. Example of adjacent string literals: "ab" L"cd". In C89, C++98 and C++03, this is undefined. As of C99 and C++11, this is well defined and the two string literals are concatenated to L"abcd". C11 and C++11 introduces the utf16, utf32 and (C++ only) utf8 string types. Concatenating any of these with a regular c-string works exactely as the wide string example above. The result of having two adjacent string literals with different prefix is implementation defined, unless one is an UTF-8 string literal and the other is a wide string literal. In this case the behaviour is undefined. Ignore the undefined and ill-formed programs (this behaviour is unchanged) and make sure that concatenating a plain c string literal with a prefixed one works correct (in C99 and C++11 and later versions). It also makes the behaviour consistent since previously, "ab" L"cd" would result in "abcd" while L"ab" "cd" would result in L"abcd". It also means the somewhat awkward updatePropertiesConcatStr() test can be removed since the added tests would not work if update_properties() was not called in concatStr(). Since the prefix is stored in the token, testing the type of the string is not relevant in TestSimplifyTokens. It is tested extensively in TestToken::stringTypes(). --- lib/token.cpp | 3 +++ test/testsimplifytokens.cpp | 27 ++++++++++++------- test/testtoken.cpp | 52 +++++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/lib/token.cpp b/lib/token.cpp index f92742d61..89d9bf0c5 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -175,6 +175,9 @@ void Token::concatStr(std::string const& b) mStr.erase(mStr.length() - 1); mStr.append(getStringLiteral(b) + "\""); + if (isCChar() && isStringLiteral(b) && b[0] != '"') { + mStr.insert(0, b.substr(0, b.find('"'))); + } update_property_info(); } diff --git a/test/testsimplifytokens.cpp b/test/testsimplifytokens.cpp index 2cf3747e7..4327081f3 100644 --- a/test/testsimplifytokens.cpp +++ b/test/testsimplifytokens.cpp @@ -90,7 +90,14 @@ private: TEST_CASE(cast); TEST_CASE(iftruefalse); + TEST_CASE(combine_strings); + TEST_CASE(combine_wstrings); + TEST_CASE(combine_ustrings); + TEST_CASE(combine_Ustrings); + TEST_CASE(combine_u8strings); + TEST_CASE(combine_mixedstrings); + TEST_CASE(double_plus); TEST_CASE(redundant_plus); TEST_CASE(redundant_plus_numbers); @@ -143,11 +150,6 @@ private: TEST_CASE(doWhileAssign); // varid TEST_CASE(test_4881); // similar to doWhileAssign (#4911), taken from #4881 with full code - TEST_CASE(combine_wstrings); - TEST_CASE(combine_ustrings); - TEST_CASE(combine_Ustrings); - TEST_CASE(combine_u8strings); - // Simplify "not" to "!" (#345) TEST_CASE(not1); @@ -1811,7 +1813,6 @@ private: tokenizer.tokenize(istr, "test.cpp"); ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false)); - ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong()); } void combine_ustrings() { @@ -1824,7 +1825,6 @@ private: tokenizer.tokenize(istr, "test.cpp"); ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false)); - ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong()); } void combine_Ustrings() { @@ -1837,7 +1837,6 @@ private: tokenizer.tokenize(istr, "test.cpp"); ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false)); - ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong()); } void combine_u8strings() { @@ -1845,13 +1844,23 @@ private: const char expected[] = "abcd = u8\"abcd\" ;"; + Tokenizer tokenizer(&settings0, this); + std::istringstream istr(code); + tokenizer.tokenize(istr, "test.cpp"); + + ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false)); + } + + void combine_mixedstrings() { + const char code[] = "abcdef = \"ab\" L\"cd\" \"ef\";"; + + const char expected[] = "abcdef = L\"abcdef\" ;"; Tokenizer tokenizer(&settings0, this); std::istringstream istr(code); tokenizer.tokenize(istr, "test.cpp"); ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false)); - ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong()); } void double_plus() { diff --git a/test/testtoken.cpp b/test/testtoken.cpp index e2ef5a75f..37c0768c3 100644 --- a/test/testtoken.cpp +++ b/test/testtoken.cpp @@ -62,6 +62,7 @@ private: TEST_CASE(getStrSize); TEST_CASE(getCharAt); TEST_CASE(strValue); + TEST_CASE(concatStr); TEST_CASE(deleteLast); TEST_CASE(deleteFirst); @@ -92,7 +93,6 @@ private: TEST_CASE(operators); TEST_CASE(updateProperties) - TEST_CASE(updatePropertiesConcatStr) TEST_CASE(isNameGuarantees1) TEST_CASE(isNameGuarantees2) TEST_CASE(isNameGuarantees3) @@ -462,6 +462,44 @@ private: ASSERT_EQUALS("a", tok.strValue()); } + void concatStr() const { + Token tok; + + tok.str("\"\""); + tok.concatStr("\"\""); + ASSERT_EQUALS("", tok.strValue()); + ASSERT(tok.isCChar()); + + tok.str("\"ab\""); + tok.concatStr("\"cd\""); + ASSERT_EQUALS("abcd", tok.strValue()); + ASSERT(tok.isCChar()); + + tok.str("L\"ab\""); + tok.concatStr("L\"cd\""); + ASSERT_EQUALS("abcd", tok.strValue()); + ASSERT(tok.isLong()); + + tok.str("L\"ab\""); + tok.concatStr("\"cd\""); + ASSERT_EQUALS("abcd", tok.strValue()); + ASSERT(tok.isLong()); + + tok.str("\"ab\""); + tok.concatStr("L\"cd\""); + ASSERT_EQUALS("abcd", tok.strValue()); + ASSERT(tok.isLong()); + + tok.str("\"ab\""); + tok.concatStr("L\"\""); + ASSERT_EQUALS("ab", tok.strValue()); + ASSERT(tok.isLong()); + + tok.str("\"ab\""); + tok.concatStr("u8\"cd\""); + ASSERT_EQUALS("abcd", tok.strValue()); + ASSERT(tok.isUtf8()); + } void deleteLast() const { TokensFrontBack listEnds{ nullptr }; @@ -977,18 +1015,6 @@ private: ASSERT_EQUALS(true, tok.isNumber()); } - void updatePropertiesConcatStr() const { - Token tok; - tok.str("true"); - - ASSERT_EQUALS(true, tok.isBoolean()); - - tok.concatStr("123"); - - ASSERT_EQUALS(false, tok.isBoolean()); - ASSERT_EQUALS("tru\"", tok.str()); - } - void isNameGuarantees1() const { Token tok; tok.str("Name");