From 6a3dd9a18542467c317bc4b670259e12fc343497 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 10 Mar 2019 10:38:50 +0100 Subject: [PATCH] Handle concatenated string and char literals This handles concatenated strings and characters from simplecpp. Previously, L'c' would be preprocessed to the tokens "L" and "'c'". cppcheck would then remove the "L" token and set "'c'" to be a wide character literal. Now, it needs to remove the prefix instead. When doing this, add handling of utf32 encoded literals (U) and UTF-8 encoded literals (u8). --- lib/token.cpp | 21 +++++++++++++++++---- lib/tokenize.cpp | 14 ++++++++------ test/testsimplifytokens.cpp | 33 +++++++++++++++++++++++++++++++-- test/testtokenize.cpp | 5 ++++- 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/lib/token.cpp b/lib/token.cpp index 12366019f..58d002894 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -34,6 +34,19 @@ #include #include +static bool isStringCharLiteral(const std::string &str, char q) +{ + + if (!endsWith(str, q)) + return false; + + const std::string prefix[5] = { "", "u8", "L", "U", "u" }; + for (const std::string & p: prefix) { + if ((str.length() + 1) > p.length() && (str.find(p + q) == 0)) + return true; + } + return false; +} const std::list TokenImpl::mEmptyValueList; Token::Token(TokensFrontBack *tokensFrontBack) : @@ -73,6 +86,10 @@ void Token::update_property_info() if (!mStr.empty()) { if (mStr == "true" || mStr == "false") tokType(eBoolean); + else if (isStringCharLiteral(mStr, '\"')) + tokType(eString); + else if (isStringCharLiteral(mStr, '\'')) + tokType(eChar); else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name if (mImpl->mVarId) tokType(eVariable); @@ -80,10 +97,6 @@ void Token::update_property_info() tokType(eName); } else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1]))) tokType(eNumber); - else if (mStr.length() > 1 && mStr[0] == '"' && endsWith(mStr,'"')) - tokType(eString); - else if (mStr.length() > 1 && mStr[0] == '\'' && endsWith(mStr,'\'')) - tokType(eChar); else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" || (mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0]))) tokType(eAssignmentOp); diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index b622eab12..0084c28f4 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -1926,13 +1926,15 @@ void Tokenizer::combineOperators() void Tokenizer::combineStringAndCharLiterals() { - // Combine wide strings and wide characters for (Token *tok = list.front(); tok; tok = tok->next()) { - if (Token::Match(tok, "[Lu] %char%|%str%")) { - // Combine 'L "string"' and 'L 'c'' - tok->str(tok->next()->str()); - tok->deleteNext(); - tok->isLong(true); + const std::string prefix[4] = {"u8", "L", "U", "u"}; + for (const std::string & p : prefix) { + if (((tok->tokType() == Token::eString) && (tok->str().find(p + "\"") == 0)) || + ((tok->tokType() == Token::eChar) && (tok->str().find(p + "\'") == 0))) { + tok->str(tok->str().substr(p.size())); + tok->isLong(p != "u8"); + break; + } } } diff --git a/test/testsimplifytokens.cpp b/test/testsimplifytokens.cpp index df36cbf44..24e1cdaf4 100644 --- a/test/testsimplifytokens.cpp +++ b/test/testsimplifytokens.cpp @@ -115,6 +115,8 @@ private: TEST_CASE(combine_wstrings); TEST_CASE(combine_ustrings); + TEST_CASE(combine_Ustrings); + TEST_CASE(combine_u8strings); // Simplify "not" to "!" (#345) TEST_CASE(not1); @@ -507,9 +509,9 @@ private: } void combine_ustrings() { - const char code[] = "abc = u\"abc\";"; + const char code[] = "abcd = u\"ab\" u\"cd\";"; - const char expected[] = "abc = \"abc\" ;"; + const char expected[] = "abcd = \"abcd\" ;"; Tokenizer tokenizer(&settings0, this); std::istringstream istr(code); @@ -519,6 +521,33 @@ private: ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong()); } + void combine_Ustrings() { + const char code[] = "abcd = U\"ab\" U\"cd\";"; + + const char expected[] = "abcd = \"abcd\" ;"; + + Tokenizer tokenizer(&settings0, this); + std::istringstream istr(code); + tokenizer.tokenize(istr, "test.cpp"); + + ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false)); + ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong()); + } + + void combine_u8strings() { + const char code[] = "abcd = u8\"ab\" u8\"cd\";"; + + const char expected[] = "abcd = \"abcd\" ;"; + + + Tokenizer tokenizer(&settings0, this); + std::istringstream istr(code); + tokenizer.tokenize(istr, "test.cpp"); + + ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false)); + ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong()); + } + void double_plus() { { const char code1[] = "void foo( int a )\n" diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index eb3179417..594948c5b 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -8210,6 +8210,7 @@ private: if (!tokenList.list.createTokens(istr,"test.cpp")) return "ERROR"; + tokenList.combineStringAndCharLiterals(); tokenList.combineOperators(); tokenList.createLinks(); tokenList.createLinks2(); @@ -8295,9 +8296,11 @@ private: ASSERT_EQUALS("a\"\"=", testAst("a=\"\"")); ASSERT_EQUALS("a\'\'=", testAst("a=\'\'")); - ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')")); ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')")); ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;")); ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));