Keep prefix in string and char literals (#2272)

Keeping the prefix in the token allows cppcheck to print the correct string and char literals in debug and error messages. To achieve this, move some of the helper functions from token.cpp to utils.h so that checks that look at string and char literals can reuse them. This is a large part of this commit. Note that the only user visible change is that when string and char literals are printed in error messages, the prefix is now included. For example: int f() { return test.substr( 0 , 4 ) == U"Hello" ? 0 : 1 ; }; now prints U"Hello" instead of "Hello" in the error message.
2019-10-16 11:41:33 +02:00 · 2019-10-16 11:41:33 +02:00 · 297360920a
parent 387132389a
commit 297360920a
10 changed files with 98 additions and 63 deletions
--- a/lib/checkstring.cpp
+++ b/lib/checkstring.cpp
@ -327,9 +327,9 @@ void CheckString::incorrectStringCompareError(const Token *tok, const std::strin

 void CheckString::incorrectStringBooleanError(const Token *tok, const std::string& string)
 {
-    const bool charLiteral = string[0] == '\'';
+    const bool charLiteral = isCharLiteral(string);
    const std::string literalType = charLiteral ? "char" : "string";
-    const std::string result = (string == "\'\\0\'") ? "false" : "true";
+    const std::string result = getCharLiteral(string) == "\\0" ? "false" : "true";
    reportError(tok,
                Severity::warning,
                charLiteral ? "incorrectCharBooleanError" : "incorrectStringBooleanError",
--- a/lib/mathlib.cpp
+++ b/lib/mathlib.cpp
@ -531,8 +531,8 @@ MathLib::bigint MathLib::toLongNumber(const std::string & str)
            return static_cast<bigint>(doubleval);
    }

-    if (str[0] == '\'' && str.size() >= 3U && endsWith(str,'\'')) {
-        return characterLiteralToLongNumber(str.substr(1,str.size()-2));
+    if (isCharLiteral(str)) {
+        return characterLiteralToLongNumber(getCharLiteral(str));
    }

    if (str[0] == '-') {
@ -600,8 +600,8 @@ static double FloatHexToDoubleNumber(const std::string& str)

 double MathLib::toDoubleNumber(const std::string &str)
 {
-    if (str[0] == '\'' && str.size() >= 3U && endsWith(str,'\''))
-        return characterLiteralToLongNumber(str.substr(1,str.size()-2));
+    if (isCharLiteral(str))
+        return characterLiteralToLongNumber(getCharLiteral(str));
    if (isIntHex(str))
        return static_cast<double>(toLongNumber(str));
    // nullcheck
--- a/lib/token.cpp
+++ b/lib/token.cpp
@ -34,22 +34,6 @@
 #include <stack>
 #include <utility>

-static const std::string literal_prefix[4] = {"u8", "u", "U", "L"};
-
-static bool isStringCharLiteral(const std::string &str, char q)
-{
-
-    if (!endsWith(str, q))
-        return false;
-    if (str[0] == q && str.length() > 1)
-        return true;
-
-    for (const std::string & p: literal_prefix) {
-        if ((str.length() + 1) > p.length() && (str.compare(0, p.size() + 1, (p + q)) == 0))
-            return true;
-    }
-    return false;
-}
 const std::list<ValueFlow::Value> TokenImpl::mEmptyValueList;

 Token::Token(TokensFrontBack *tokensFrontBack) :
@ -89,9 +73,9 @@ void Token::update_property_info()
    if (!mStr.empty()) {
        if (mStr == "true" || mStr == "false")
            tokType(eBoolean);
-        else if (isStringCharLiteral(mStr, '\"'))
+        else if (isStringLiteral(mStr))
            tokType(eString);
-        else if (isStringCharLiteral(mStr, '\''))
+        else if (isCharLiteral(mStr))
            tokType(eChar);
        else if (std::isalpha((unsigned char)mStr[0]) || mStr[0] == '_' || mStr[0] == '$') { // Name
            if (mImpl->mVarId)
@ -168,17 +152,11 @@ void Token::update_property_isStandardType()

 void Token::update_property_char_string_literal()
 {
-    if (!(mTokType == Token::eString || mTokType == Token::eChar)) // Token has already been updated
+    if (mTokType != Token::eString && mTokType != Token::eChar)
        return;

-    for (const std::string & p : literal_prefix) {
-        if (((mTokType == Token::eString) && mStr.compare(0, p.size() + 1, p + "\"") == 0) ||
-            ((mTokType == Token::eChar) && (mStr.compare(0, p.size() +  1, p + "\'") == 0))) {
-            mStr = mStr.substr(p.size());
-            isLong(p != "u8");
-            break;
-        }
-    }
+    isLong(((mTokType == Token::eString) && isPrefixStringCharLiteral(mStr, '"', "L")) ||
+           ((mTokType == Token::eChar) && isPrefixStringCharLiteral(mStr, '\'', "L")));
 }

 bool Token::isUpperCaseName() const
@ -195,7 +173,7 @@ bool Token::isUpperCaseName() const
 void Token::concatStr(std::string const& b)
 {
    mStr.erase(mStr.length() - 1);
-    mStr.append(b.begin() + 1, b.end());
+    mStr.append(getStringLiteral(b) + "\"");

    update_property_info();
 }
@ -203,7 +181,7 @@ void Token::concatStr(std::string const& b)
 std::string Token::strValue() const
 {
    assert(mTokType == eString);
-    std::string ret(mStr.substr(1, mStr.length() - 2));
+    std::string ret(getStringLiteral(mStr));
    std::string::size_type pos = 0U;
    while ((pos = ret.find('\\', pos)) != std::string::npos) {
        ret.erase(pos,1U);
@ -721,8 +699,9 @@ nonneg int Token::getStrLength(const Token *tok)
    assert(tok->mTokType == eString);

    int len = 0;
-    std::string::const_iterator it = tok->str().begin() + 1U;
-    const std::string::const_iterator end = tok->str().end() - 1U;
+    const std::string str(getStringLiteral(tok->str()));
+    std::string::const_iterator it = str.begin();
+    const std::string::const_iterator end = str.end();

    while (it != end) {
        if (*it == '\\') {
@ -747,9 +726,9 @@ nonneg int Token::getStrSize(const Token *tok)
 {
    assert(tok != nullptr);
    assert(tok->tokType() == eString);
-    const std::string &str = tok->str();
+    const std::string str(getStringLiteral(tok->str()));
    int sizeofstring = 1;
-    for (int i = 1; i < (int)str.size() - 1; i++) {
+    for (int i = 0; i < (int)str.size(); i++) {
        if (str[i] == '\\')
            ++i;
        ++sizeofstring;
@ -760,9 +739,9 @@ nonneg int Token::getStrSize(const Token *tok)
 std::string Token::getCharAt(const Token *tok, MathLib::bigint index)
 {
    assert(tok != nullptr);
-
-    std::string::const_iterator it = tok->str().begin() + 1U;
-    const std::string::const_iterator end = tok->str().end() - 1U;
+    std::string str(getStringLiteral(tok->str()));
+    std::string::const_iterator it = str.begin();
+    const std::string::const_iterator end = str.end();

    while (it != end) {
        if (index == 0) {
@ -1161,9 +1140,7 @@ void Token::stringify(std::ostream& os, bool varid, bool attributes, bool macro)
        if (isComplex())
            os << "_Complex ";
        if (isLong()) {
-            if (mTokType == eString || mTokType == eChar)
-                os << "L";
-            else
+            if (!(mTokType == eString || mTokType == eChar))
                os << "long ";
        }
    }
@ -1428,8 +1405,8 @@ static std::string stringFromTokenRange(const Token* start, const Token* end)
    for (const Token *tok = start; tok && tok != end; tok = tok->next()) {
        if (tok->isUnsigned())
            ret << "unsigned ";
-        if (tok->isLong())
-            ret << (tok->isLiteral() ? "L" : "long ");
+        if (tok->isLong() && !tok->isLiteral())
+            ret << "long ";
        if (tok->originalName().empty() || tok->isUnsigned() || tok->isLong()) {
            ret << tok->str();
        } else
--- a/lib/tokenize.cpp
+++ b/lib/tokenize.cpp
@ -2484,7 +2484,7 @@ void Tokenizer::combineStringAndCharLiterals()
    for (Token *tok = list.front();
         tok;
         tok = tok->next()) {
-        if (tok->str()[0] != '"')
+        if (!isStringLiteral(tok->str()))
            continue;

        tok->str(simplifyString(tok->str()));
@ -10637,8 +10637,11 @@ void Tokenizer::simplifyMicrosoftStringFunctions()
            tok->deleteNext();
            tok->deleteThis();
            tok->deleteNext();
-            if (!ansi)
+            if (!ansi) {
                tok->isLong(true);
+                if (tok->str()[0] != 'L')
+                    tok->str("L" + tok->str());
+            }
            while (Token::Match(tok->next(), "_T|_TEXT|TEXT ( %char%|%str% )")) {
                tok->next()->deleteNext();
                tok->next()->deleteThis();
--- a/lib/utils.h
+++ b/lib/utils.h
@ -36,6 +36,56 @@ inline bool endsWith(const std::string &str, const char end[], std::size_t endle
    return (str.size() >= endlen) && (str.compare(str.size()-endlen, endlen, end)==0);
 }

+inline static bool isPrefixStringCharLiteral(const std::string &str, char q, const std::string& p)
+{
+    if (!endsWith(str, q))
+        return false;
+    if ((str.length() + 1) > p.length() && (str.compare(0, p.size() + 1, p + q) == 0))
+        return true;
+    return false;
+}
+
+inline static bool isStringCharLiteral(const std::string &str, char q)
+{
+    for (const std::string & p: {
+    "", "u8", "u", "U", "L"
+}) {
+        if (isPrefixStringCharLiteral(str, q, p))
+            return true;
+    }
+    return false;
+}
+
+inline static bool isStringLiteral(const std::string &str)
+{
+    return isStringCharLiteral(str, '"');
+}
+
+inline static bool isCharLiteral(const std::string &str)
+{
+    return isStringCharLiteral(str, '\'');
+}
+
+inline static std::string getStringCharLiteral(const std::string &str, char q)
+{
+    const std::size_t quotePos = str.find(q);
+    return str.substr(quotePos + 1U, str.size() - quotePos - 2U);
+}
+
+inline static std::string getStringLiteral(const std::string &str)
+{
+    if (isStringLiteral(str))
+        return getStringCharLiteral(str, '"');
+    return "";
+}
+
+inline static std::string getCharLiteral(const std::string &str)
+{
+    if (isCharLiteral(str))
+        return getStringCharLiteral(str, '\'');
+    return "";
+}
+
 inline static const char *getOrdinalText(int i)
 {
    if (i == 1)
--- a/test/testmathlib.cpp
+++ b/test/testmathlib.cpp
@ -286,6 +286,7 @@ private:
        ASSERT_EQUALS((int)('\x10'), MathLib::toLongNumber("'\\x10'"));
        ASSERT_EQUALS((int)('\100'), MathLib::toLongNumber("'\\100'"));
        ASSERT_EQUALS((int)('\200'), MathLib::toLongNumber("'\\200'"));
+        ASSERT_EQUALS((int)(L'A'),   MathLib::toLongNumber("L'A'"));
 #ifdef __GNUC__
        // BEGIN Implementation-specific results
        ASSERT_EQUALS((int)('AB'),    MathLib::toLongNumber("'AB'"));
@ -375,6 +376,7 @@ private:
        ASSERT_EQUALS_DOUBLE(0.0,    MathLib::toDoubleNumber("-0.0"),     0.000001);
        ASSERT_EQUALS_DOUBLE(0.0,    MathLib::toDoubleNumber("+0.0"),     0.000001);
        ASSERT_EQUALS_DOUBLE('0',    MathLib::toDoubleNumber("'0'"),      0.000001);
+        ASSERT_EQUALS_DOUBLE(L'0',   MathLib::toDoubleNumber("L'0'"),     0.000001);

        ASSERT_EQUALS_DOUBLE(192, MathLib::toDoubleNumber("0x0.3p10"), 0.000001);
        ASSERT_EQUALS_DOUBLE(5.42101e-20, MathLib::toDoubleNumber("0x1p-64"), 1e-20);
--- a/test/testsimplifytokens.cpp
+++ b/test/testsimplifytokens.cpp
@ -1804,7 +1804,7 @@ private:
    void combine_wstrings() {
        const char code[] =  "a = L\"hello \"  L\"world\" ;\n";

-        const char expected[] =  "a = \"hello world\" ;";
+        const char expected[] =  "a = L\"hello world\" ;";

        Tokenizer tokenizer(&settings0, this);
        std::istringstream istr(code);
@ -1817,33 +1817,33 @@ private:
    void combine_ustrings() {
        const char code[] =  "abcd = u\"ab\" u\"cd\";";

-        const char expected[] =  "abcd = \"abcd\" ;";
+        const char expected[] =  "abcd = u\"abcd\" ;";

        Tokenizer tokenizer(&settings0, this);
        std::istringstream istr(code);
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
+        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void combine_Ustrings() {
        const char code[] =  "abcd = U\"ab\" U\"cd\";";

-        const char expected[] =  "abcd = \"abcd\" ;";
+        const char expected[] =  "abcd = U\"abcd\" ;";

        Tokenizer tokenizer(&settings0, this);
        std::istringstream istr(code);
        tokenizer.tokenize(istr, "test.cpp");

        ASSERT_EQUALS(expected, tokenizer.tokens()->stringifyList(nullptr, false));
-        ASSERT_EQUALS(true, tokenizer.tokens()->tokAt(2)->isLong());
+        ASSERT_EQUALS(false, tokenizer.tokens()->tokAt(2)->isLong());
    }

    void combine_u8strings() {
        const char code[] =  "abcd = u8\"ab\" u8\"cd\";";

-        const char expected[] =  "abcd = \"abcd\" ;";
+        const char expected[] =  "abcd = u8\"abcd\" ;";


        Tokenizer tokenizer(&settings0, this);
--- a/test/teststring.cpp
+++ b/test/teststring.cpp
@ -603,7 +603,7 @@ private:
        check("int f() {\n"
              "    return test.substr( 0 , 4 ) == L\"Hello\" ? 0 : 1 ;\n"
              "}");
-        ASSERT_EQUALS("[test.cpp:2]: (warning) String literal \"Hello\" doesn't match length argument for substr().\n", errout.str());
+        ASSERT_EQUALS("[test.cpp:2]: (warning) String literal L\"Hello\" doesn't match length argument for substr().\n", errout.str());

        check("int f() {\n"
              "    return test.substr( 0 , 5 ) == \"Hello\" ? 0 : 1 ;\n"
@ -688,7 +688,7 @@ private:
              "  int x = 'd' ? 1 : 2;\n"
              "}");
        ASSERT_EQUALS("[test.cpp:2]: (warning) Conversion of char literal 'a' to bool always evaluates to true.\n"
-                      "[test.cpp:3]: (warning) Conversion of char literal 'b' to bool always evaluates to true.\n"
+                      "[test.cpp:3]: (warning) Conversion of char literal L'b' to bool always evaluates to true.\n"
                      "[test.cpp:4]: (warning) Conversion of char literal 'c' to bool always evaluates to true.\n"
                      "[test.cpp:5]: (warning) Conversion of char literal 'd' to bool always evaluates to true.\n"
                      , errout.str());
@ -704,7 +704,7 @@ private:
              "  if(L'\\0' || cond){}\n"
              "}");
        ASSERT_EQUALS("[test.cpp:2]: (warning) Conversion of char literal '\\0' to bool always evaluates to false.\n"
-                      "[test.cpp:3]: (warning) Conversion of char literal '\\0' to bool always evaluates to false.\n", errout.str());
+                      "[test.cpp:3]: (warning) Conversion of char literal L'\\0' to bool always evaluates to false.\n", errout.str());
    }

    void deadStrcmp() {
--- a/test/testtoken.cpp
+++ b/test/testtoken.cpp
@ -887,7 +887,7 @@ private:
        tok.concatStr("123");

        ASSERT_EQUALS(false, tok.isBoolean());
-        ASSERT_EQUALS("tru23", tok.str());
+        ASSERT_EQUALS("tru\"", tok.str());
    }

    void isNameGuarantees1() const {
@ -990,6 +990,9 @@ private:

        givenACodeSampleToTokenize data4("return L\"a\";");
        ASSERT_EQUALS("returnL\"a\"", data4.tokens()->expressionString());
+
+        givenACodeSampleToTokenize data5("return U\"a\";");
+        ASSERT_EQUALS("returnU\"a\"", data5.tokens()->expressionString());
    }

    void hasKnownIntValue() {
--- a/test/testtokenize.cpp
+++ b/test/testtokenize.cpp
@ -7222,10 +7222,10 @@ private:
        ASSERT_EQUALS("a\"\"=", testAst("a=\"\""));
        ASSERT_EQUALS("a\'\'=", testAst("a=\'\'"));
        ASSERT_EQUALS("'X''a'>", testAst("('X' > 'a')"));
-        ASSERT_EQUALS("'X''a'>", testAst("(L'X' > L'a')"));
-        ASSERT_EQUALS("'X''a'>", testAst("(u'X' > u'a')"));
-        ASSERT_EQUALS("'X''a'>", testAst("(U'X' > U'a')"));
-        ASSERT_EQUALS("'X''a'>", testAst("(u8'X' > u8'a')"));
+        ASSERT_EQUALS("L'X'L'a'>", testAst("(L'X' > L'a')"));
+        ASSERT_EQUALS("u'X'u'a'>", testAst("(u'X' > u'a')"));
+        ASSERT_EQUALS("U'X'U'a'>", testAst("(U'X' > U'a')"));
+        ASSERT_EQUALS("u8'X'u8'a'>", testAst("(u8'X' > u8'a')"));

        ASSERT_EQUALS("a0>bc/d:?", testAst("(a>0) ? (b/(c)) : d;"));
        ASSERT_EQUALS("abc/+d+", testAst("a + (b/(c)) + d;"));