diff --git a/lib/token.cpp b/lib/token.cpp index 12366019f..58d002894 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -34,6 +34,19 @@ #include #include +static bool isStringCharLiteral(const std::string &str, char q) +{ + + if (!endsWith(str, q)) + return false; + + const std::string prefix[5] = { "", "u8", "L", "U", "u" }; + for (const std::string & p: prefix) { + if ((str.length() + 1) > p.length() && (str.find(p + q) == 0)) + return true; + } + return false; +} const std::list TokenImpl::mEmptyValueList; Token::Token(TokensFrontBack *tokensFrontBack) : @@ -73,6 +86,10 @@ void Token::update_property_info() if (!mStr.empty()) { if (mStr == "true" || mStr == "false") tokType(eBoolean); + else if (isStringCharLiteral(mStr, '\"')) + tokType(eString); + else if (isStringCharLiteral(mStr, '\'')) + tokType(eChar); else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name if (mImpl->mVarId) tokType(eVariable); @@ -80,10 +97,6 @@ void Token::update_property_info() tokType(eName); } else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1]))) tokType(eNumber); - else if (mStr.length() > 1 && mStr[0] == '"' && endsWith(mStr,'"')) - tokType(eString); - else if (mStr.length() > 1 && mStr[0] == '\'' && endsWith(mStr,'\'')) - tokType(eChar); else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" || (mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0]))) tokType(eAssignmentOp); diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index b622eab12..0084c28f4 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -1926,13 +1926,15 @@ void Tokenizer::combineOperators() void Tokenizer::combineStringAndCharLiterals() { - // Combine wide strings and wide characters for (Token *tok = list.front(); tok; tok = tok->next()) { - if (Token::Match(tok, "[Lu] %char%|%str%")) { - // Combine 'L "string"' and 'L 'c'' - tok->str(tok->next()->str()); - tok->deleteNext(); - tok->isLong(true); + const std::string prefix[4] = {"u8", "L", "U", "u"}; + for (const std::string & p : prefix) { + if (((tok->tokType() == Token::eString) && (tok->str().find(p + "\"") == 0)) || + ((tok->tokType() == Token::eChar) && (tok->str().find(p + "\'") == 0))) { + tok->str(tok->str().substr(p.size())); + tok->isLong(p != "u8"); + break; + } } } diff --git a/test/testsimplifytokens.cpp b/test/testsimplifytokens.cpp index df36cbf44..24e1cdaf4 100644 --- a/test/testsimplifytokens.cpp +++ b/test/testsimplifytokens.cpp @@ -115,6 +115,8 @@ private: TEST_CASE(combine_wstrings); TEST_CASE(combine_ustrings); + TEST_CASE(combine_Ustrings); + TEST_CASE(combine_u8strings); // Simplify "not" to "!" (#345) TEST_CASE(not1); @@ -507,9 +509,9 @@ private: } void combine_ustrings() { - const char code[] = "abc = u\"abc\";"; + const char code[] = "abcd = u\"ab\" u\"cd\";"; - const char expected[] = "abc = \"abc\" ;"; + const char expected[] = "abcd = \"abcd\" ;"; Tokenizer tokenizer(&settings0, this); std::istringstream istr(code); @@ -519,6 +521,33 @@ private: ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong()); } + void combine_Ustrings() { + const char code[] = "abcd = U\"ab\" U\"cd\";"; + + const char expected[] = "abcd = \"abcd\" ;"; + + Tokenizer tokenizer(&settings0, this); + std::istringstream istr(code); + tokenizer.tokenize(istr, "test.cpp"); + + ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false)); + ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong()); + } + + void combine_u8strings() { + const char code[] = "abcd = u8\"ab\" u8\"cd\";"; + + const char expected[] = "abcd = \"abcd\" ;"; + + + Tokenizer tokenizer(&settings0, this); + std::istringstream istr(code); + tokenizer.tokenize(istr, "test.cpp"); + + ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(0, false)); + ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong()); + } + void double_plus() { { const char code1[] = "void foo( int a )\n" diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index eb3179417..594948c5b 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -8210,6 +8210,7 @@ private: if (!tokenList.list.createTokens(istr,"test.cpp")) return "ERROR"; + tokenList.combineStringAndCharLiterals(); tokenList.combineOperators(); tokenList.createLinks(); tokenList.createLinks2(); @@ -8295,9 +8296,11 @@ private: ASSERT_EQUALS("a\"\"=", testAst("a=\"\"")); ASSERT_EQUALS("a\'\'=", testAst("a=\'\'")); - ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')")); ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')")); + ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')")); ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;")); ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));