Token::ConcatStr(): Handle mixed string literals (#2337)

Improve handling of adjacent string literals of different types.

Example of adjacent string literals: "ab" L"cd".

In C89, C++98 and C++03, this is undefined. As of C99 and C++11, this is
well defined and the two string literals are concatenated to L"abcd".
C11 and C++11 introduces the utf16, utf32 and (C++ only) utf8 string
types. Concatenating any of these with a regular c-string works exactely
as the wide string example above. The result of having two adjacent
string literals with different prefix is implementation defined, unless
one is an UTF-8 string literal and the other is a wide string literal.
In this case the behaviour is undefined.

Ignore the undefined and ill-formed programs (this behaviour is unchanged)
and make sure that concatenating a plain c string literal with a prefixed
one works correct (in C99 and C++11 and later versions). It also makes the
behaviour consistent since previously, "ab" L"cd" would result in "abcd"
while L"ab" "cd" would result in L"abcd".

It also means the somewhat awkward updatePropertiesConcatStr() test can
be removed since the added tests would not work if update_properties()
was not called in concatStr().

Since the prefix is stored in the token, testing the type of the string
is not relevant in TestSimplifyTokens. It is tested extensively in
TestToken::stringTypes().
This commit is contained in:
Rikard Falkeborn 2019-11-08 08:03:45 +01:00 committed by Daniel Marjamäki
parent 650408a210
commit 1fcbd696be
3 changed files with 60 additions and 22 deletions

View File

@ -175,6 +175,9 @@ void Token::concatStr(std::string const& b)
mStr.erase(mStr.length() - 1);
mStr.append(getStringLiteral(b) + "\"");
if (isCChar() && isStringLiteral(b) && b[0] != '"') {
mStr.insert(0, b.substr(0, b.find('"')));
}
update_property_info();
}

View File

@ -90,7 +90,14 @@ private:
TEST_CASE(cast);
TEST_CASE(iftruefalse);
TEST_CASE(combine_strings);
TEST_CASE(combine_wstrings);
TEST_CASE(combine_ustrings);
TEST_CASE(combine_Ustrings);
TEST_CASE(combine_u8strings);
TEST_CASE(combine_mixedstrings);
TEST_CASE(double_plus);
TEST_CASE(redundant_plus);
TEST_CASE(redundant_plus_numbers);
@ -143,11 +150,6 @@ private:
TEST_CASE(doWhileAssign); // varid
TEST_CASE(test_4881); // similar to doWhileAssign (#4911), taken from #4881 with full code
TEST_CASE(combine_wstrings);
TEST_CASE(combine_ustrings);
TEST_CASE(combine_Ustrings);
TEST_CASE(combine_u8strings);
// Simplify "not" to "!" (#345)
TEST_CASE(not1);
@ -1811,7 +1813,6 @@ private:
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
}
void combine_ustrings() {
@ -1824,7 +1825,6 @@ private:
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
}
void combine_Ustrings() {
@ -1837,7 +1837,6 @@ private:
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
}
void combine_u8strings() {
@ -1845,13 +1844,23 @@ private:
const char expected[] = "abcd = u8\"abcd\" ;";
Tokenizer tokenizer(&settings0, this);
std::istringstream istr(code);
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
}
void combine_mixedstrings() {
const char code[] = "abcdef = \"ab\" L\"cd\" \"ef\";";
const char expected[] = "abcdef = L\"abcdef\" ;";
Tokenizer tokenizer(&settings0, this);
std::istringstream istr(code);
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
}
void double_plus() {

View File

@ -62,6 +62,7 @@ private:
TEST_CASE(getStrSize);
TEST_CASE(getCharAt);
TEST_CASE(strValue);
TEST_CASE(concatStr);
TEST_CASE(deleteLast);
TEST_CASE(deleteFirst);
@ -92,7 +93,6 @@ private:
TEST_CASE(operators);
TEST_CASE(updateProperties)
TEST_CASE(updatePropertiesConcatStr)
TEST_CASE(isNameGuarantees1)
TEST_CASE(isNameGuarantees2)
TEST_CASE(isNameGuarantees3)
@ -462,6 +462,44 @@ private:
ASSERT_EQUALS("a", tok.strValue());
}
void concatStr() const {
Token tok;
tok.str("\"\"");
tok.concatStr("\"\"");
ASSERT_EQUALS("", tok.strValue());
ASSERT(tok.isCChar());
tok.str("\"ab\"");
tok.concatStr("\"cd\"");
ASSERT_EQUALS("abcd", tok.strValue());
ASSERT(tok.isCChar());
tok.str("L\"ab\"");
tok.concatStr("L\"cd\"");
ASSERT_EQUALS("abcd", tok.strValue());
ASSERT(tok.isLong());
tok.str("L\"ab\"");
tok.concatStr("\"cd\"");
ASSERT_EQUALS("abcd", tok.strValue());
ASSERT(tok.isLong());
tok.str("\"ab\"");
tok.concatStr("L\"cd\"");
ASSERT_EQUALS("abcd", tok.strValue());
ASSERT(tok.isLong());
tok.str("\"ab\"");
tok.concatStr("L\"\"");
ASSERT_EQUALS("ab", tok.strValue());
ASSERT(tok.isLong());
tok.str("\"ab\"");
tok.concatStr("u8\"cd\"");
ASSERT_EQUALS("abcd", tok.strValue());
ASSERT(tok.isUtf8());
}
void deleteLast() const {
TokensFrontBack listEnds{ nullptr };
@ -977,18 +1015,6 @@ private:
ASSERT_EQUALS(true, tok.isNumber());
}
void updatePropertiesConcatStr() const {
Token tok;
tok.str("true");
ASSERT_EQUALS(true, tok.isBoolean());
tok.concatStr("123");
ASSERT_EQUALS(false, tok.isBoolean());
ASSERT_EQUALS("tru\"", tok.str());
}
void isNameGuarantees1() const {
Token tok;
tok.str("Name");