Handle concatenated string and char literals
This handles concatenated strings and characters from simplecpp. Previously, L'c' would be preprocessed to the tokens "L" and "'c'". cppcheck would then remove the "L" token and set "'c'" to be a wide character literal. Now, it needs to remove the prefix instead. When doing this, add handling of utf32 encoded literals (U) and UTF-8 encoded literals (u8).
This commit is contained in:
parent
93194f47a1
commit
6a3dd9a185
|
@ -34,6 +34,19 @@
|
|||
#include <stack>
|
||||
#include <utility>
|
||||
|
||||
static bool isStringCharLiteral(const std::string &str, char q)
|
||||
{
|
||||
|
||||
if (!endsWith(str, q))
|
||||
return false;
|
||||
|
||||
const std::string prefix[5] = { "", "u8", "L", "U", "u" };
|
||||
for (const std::string & p: prefix) {
|
||||
if ((str.length() + 1) > p.length() && (str.find(p + q) == 0))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const std::list<ValueFlow::Value> TokenImpl::mEmptyValueList;
|
||||
|
||||
Token::Token(TokensFrontBack *tokensFrontBack) :
|
||||
|
@ -73,6 +86,10 @@ void Token::update_property_info()
|
|||
if (!mStr.empty()) {
|
||||
if (mStr == "true" || mStr == "false")
|
||||
tokType(eBoolean);
|
||||
else if (isStringCharLiteral(mStr, '\"'))
|
||||
tokType(eString);
|
||||
else if (isStringCharLiteral(mStr, '\''))
|
||||
tokType(eChar);
|
||||
else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name
|
||||
if (mImpl->mVarId)
|
||||
tokType(eVariable);
|
||||
|
@ -80,10 +97,6 @@ void Token::update_property_info()
|
|||
tokType(eName);
|
||||
} else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1])))
|
||||
tokType(eNumber);
|
||||
else if (mStr.length() > 1 && mStr[0] == '"' && endsWith(mStr,'"'))
|
||||
tokType(eString);
|
||||
else if (mStr.length() > 1 && mStr[0] == '\'' && endsWith(mStr,'\''))
|
||||
tokType(eChar);
|
||||
else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" ||
|
||||
(mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0])))
|
||||
tokType(eAssignmentOp);
|
||||
|
|
|
@ -1926,13 +1926,15 @@ void Tokenizer::combineOperators()
|
|||
|
||||
void Tokenizer::combineStringAndCharLiterals()
|
||||
{
|
||||
// Combine wide strings and wide characters
|
||||
for (Token *tok = list.front(); tok; tok = tok->next()) {
|
||||
if (Token::Match(tok, "[Lu] %char%|%str%")) {
|
||||
// Combine 'L "string"' and 'L 'c''
|
||||
tok->str(tok->next()->str());
|
||||
tok->deleteNext();
|
||||
tok->isLong(true);
|
||||
const std::string prefix[4] = {"u8", "L", "U", "u"};
|
||||
for (const std::string & p : prefix) {
|
||||
if (((tok->tokType() == Token::eString) && (tok->str().find(p + "\"") == 0)) ||
|
||||
((tok->tokType() == Token::eChar) && (tok->str().find(p + "\'") == 0))) {
|
||||
tok->str(tok->str().substr(p.size()));
|
||||
tok->isLong(p != "u8");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -115,6 +115,8 @@ private:
|
|||
|
||||
TEST_CASE(combine_wstrings);
|
||||
TEST_CASE(combine_ustrings);
|
||||
TEST_CASE(combine_Ustrings);
|
||||
TEST_CASE(combine_u8strings);
|
||||
|
||||
// Simplify "not" to "!" (#345)
|
||||
TEST_CASE(not1);
|
||||
|
@ -507,9 +509,9 @@ private:
|
|||
}
|
||||
|
||||
void combine_ustrings() {
|
||||
const char code[] = "abc = u\"abc\";";
|
||||
const char code[] = "abcd = u\"ab\" u\"cd\";";
|
||||
|
||||
const char expected[] = "abc = \"abc\" ;";
|
||||
const char expected[] = "abcd = \"abcd\" ;";
|
||||
|
||||
Tokenizer tokenizer(&settings0, this);
|
||||
std::istringstream istr(code);
|
||||
|
@ -519,6 +521,33 @@ private:
|
|||
ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
|
||||
}
|
||||
|
||||
void combine_Ustrings() {
|
||||
const char code[] = "abcd = U\"ab\" U\"cd\";";
|
||||
|
||||
const char expected[] = "abcd = \"abcd\" ;";
|
||||
|
||||
Tokenizer tokenizer(&settings0, this);
|
||||
std::istringstream istr(code);
|
||||
tokenizer.tokenize(istr, "test.cpp");
|
||||
|
||||
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
|
||||
ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
|
||||
}
|
||||
|
||||
void combine_u8strings() {
|
||||
const char code[] = "abcd = u8\"ab\" u8\"cd\";";
|
||||
|
||||
const char expected[] = "abcd = \"abcd\" ;";
|
||||
|
||||
|
||||
Tokenizer tokenizer(&settings0, this);
|
||||
std::istringstream istr(code);
|
||||
tokenizer.tokenize(istr, "test.cpp");
|
||||
|
||||
ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false));
|
||||
ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
|
||||
}
|
||||
|
||||
void double_plus() {
|
||||
{
|
||||
const char code1[] = "void foo( int a )\n"
|
||||
|
|
|
@ -8210,6 +8210,7 @@ private:
|
|||
if (!tokenList.list.createTokens(istr,"test.cpp"))
|
||||
return "ERROR";
|
||||
|
||||
tokenList.combineStringAndCharLiterals();
|
||||
tokenList.combineOperators();
|
||||
tokenList.createLinks();
|
||||
tokenList.createLinks2();
|
||||
|
@ -8295,9 +8296,11 @@ private:
|
|||
|
||||
ASSERT_EQUALS("a\"\"=", testAst("a=\"\""));
|
||||
ASSERT_EQUALS("a\'\'=", testAst("a=\'\'"));
|
||||
|
||||
ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')"));
|
||||
ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')"));
|
||||
ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')"));
|
||||
ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')"));
|
||||
ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')"));
|
||||
|
||||
ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;"));
|
||||
ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));
|
||||
|
|
Loading…
Reference in New Issue