Handle concatenated string and char literals

This handles concatenated strings and characters from simplecpp.
Previously, L'c' would be preprocessed to the tokens  "L" and "'c'".
cppcheck would then remove the "L" token and set "'c'" to be a wide
character literal. Now, it needs to remove the prefix instead.

When doing this, add handling of utf32 encoded literals (U) and UTF-8
encoded literals (u8).
This commit is contained in:
Rikard Falkeborn 2019-03-10 10:38:50 +01:00 committed by Daniel Marjamäki
parent 93194f47a1
commit 6a3dd9a185
4 changed files with 60 additions and 13 deletions

View File

@ -34,6 +34,19 @@
#include <stack>
#include <utility>
static bool isStringCharLiteral(const std::string &str, char q)
{
if (!endsWith(str, q))
return false;
const std::string prefix[5] = { "", "u8", "L", "U", "u" };
for (const std::string & p: prefix) {
if ((str.length() + 1) > p.length() && (str.find(p + q) == 0))
return true;
}
return false;
}
const std::list<ValueFlow::Value> TokenImpl::mEmptyValueList;
Token::Token(TokensFrontBack *tokensFrontBack) :
@ -73,6 +86,10 @@ void Token::update_property_info()
if (!mStr.empty()) {
if (mStr == "true" || mStr == "false")
tokType(eBoolean);
else if (isStringCharLiteral(mStr, '\"'))
tokType(eString);
else if (isStringCharLiteral(mStr, '\''))
tokType(eChar);
else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name
if (mImpl->mVarId)
tokType(eVariable);
@ -80,10 +97,6 @@ void Token::update_property_info()
tokType(eName);
} else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1])))
tokType(eNumber);
else if (mStr.length() > 1 && mStr[0] == '"' && endsWith(mStr,'"'))
tokType(eString);
else if (mStr.length() > 1 && mStr[0] == '\'' && endsWith(mStr,'\''))
tokType(eChar);
else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" ||
(mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0])))
tokType(eAssignmentOp);

View File

@ -1926,13 +1926,15 @@ void Tokenizer::combineOperators()
void Tokenizer::combineStringAndCharLiterals()
{
// Combine wide strings and wide characters
for (Token *tok = list.front(); tok; tok = tok->next()) {
if (Token::Match(tok, "[Lu] %char%|%str%")) {
// Combine 'L "string"' and 'L 'c''
tok->str(tok->next()->str());
tok->deleteNext();
tok->isLong(true);
const std::string prefix[4] = {"u8", "L", "U", "u"};
for (const std::string & p : prefix) {
if (((tok->tokType() == Token::eString) && (tok->str().find(p + "\"") == 0)) ||
((tok->tokType() == Token::eChar) && (tok->str().find(p + "\'") == 0))) {
tok->str(tok->str().substr(p.size()));
tok->isLong(p != "u8");
break;
}
}
}

View File

@ -115,6 +115,8 @@ private:
TEST_CASE(combine_wstrings);
TEST_CASE(combine_ustrings);
TEST_CASE(combine_Ustrings);
TEST_CASE(combine_u8strings);
// Simplify "not" to "!" (#345)
TEST_CASE(not1);
@ -507,9 +509,9 @@ private:
}
void combine_ustrings() {
const char code[] = "abc = u\"abc\";";
const char code[] = "abcd = u\"ab\" u\"cd\";";
const char expected[] = "abc = \"abc\" ;";
const char expected[] = "abcd = \"abcd\" ;";
Tokenizer tokenizer(&settings0, this);
std::istringstream istr(code);
@ -519,6 +521,33 @@ private:
ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
}
void combine_Ustrings() {
const char code[] = "abcd = U\"ab\" U\"cd\";";
const char expected[] = "abcd = \"abcd\" ;";
Tokenizer tokenizer(&settings0, this);
std::istringstream istr(code);
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
}
void combine_u8strings() {
const char code[] = "abcd = u8\"ab\" u8\"cd\";";
const char expected[] = "abcd = \"abcd\" ;";
Tokenizer tokenizer(&settings0, this);
std::istringstream istr(code);
tokenizer.tokenize(istr, "test.cpp");
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
}
void double_plus() {
{
const char code1[] = "void foo( int a )\n"

View File

@ -8210,6 +8210,7 @@ private:
if (!tokenList.list.createTokens(istr,"test.cpp"))
return "ERROR";
tokenList.combineStringAndCharLiterals();
tokenList.combineOperators();
tokenList.createLinks();
tokenList.createLinks2();
@ -8295,9 +8296,11 @@ private:
ASSERT_EQUALS("a\"\"=", testAst("a=\"\""));
ASSERT_EQUALS("a\'\'=", testAst("a=\'\'"));
ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')"));
ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')"));
ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')"));
ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')"));
ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')"));
ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;"));
ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));