From 7c992ced4cd2f981d6df0e6585da5c5a9eded4f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Wed, 30 Aug 2023 19:35:43 +0200 Subject: [PATCH] Fixed #11901 (Performance regression: large array with strings) (#5375) Analysis has slowed down a lot when there are many strings in an array. --- .github/workflows/CI-unixish.yml | 1 + .github/workflows/CI-windows.yml | 1 + lib/astutils.cpp | 2 ++ lib/token.h | 24 ++++++++++++------ lib/valueflow.cpp | 43 +++++++++++++++++--------------- test/cli/test-other.py | 13 ++++++++++ 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml index 10cbf319d..856229f6d 100644 --- a/.github/workflows/CI-unixish.yml +++ b/.github/workflows/CI-unixish.yml @@ -334,6 +334,7 @@ jobs: run: | python3 -m pip install pip --upgrade python3 -m pip install pytest + python3 -m pip install pytest-timeout - name: Build cppcheck run: | diff --git a/.github/workflows/CI-windows.yml b/.github/workflows/CI-windows.yml index 98df86f85..0b9155716 100644 --- a/.github/workflows/CI-windows.yml +++ b/.github/workflows/CI-windows.yml @@ -128,6 +128,7 @@ jobs: python -m pip install pip --upgrade || exit /b !errorlevel! python -m pip install pytest || exit /b !errorlevel! python -m pip install pytest-custom_exit_code || exit /b !errorlevel! + python -m pip install pytest-timeout || exit /b !errorlevel! - name: Run CMake if: false # TODO: enable diff --git a/lib/astutils.cpp b/lib/astutils.cpp index 53de61dcc..d4232de0b 100644 --- a/lib/astutils.cpp +++ b/lib/astutils.cpp @@ -1465,6 +1465,8 @@ bool isUsedAsBool(const Token* const tok, const Settings* settings) if (isForLoopCondition(tok)) return true; if (!Token::Match(parent, "%cop%")) { + if (parent->str() == "," && parent->isInitComma()) + return false; std::vector vtParents = getParentValueTypes(tok, settings); return std::any_of(vtParents.cbegin(), vtParents.cend(), [&](const ValueType& vt) { return vt.pointer == 0 && vt.type == ValueType::BOOL; diff --git a/lib/token.h b/lib/token.h index dfd601a76..e50fbec9a 100644 --- a/lib/token.h +++ b/lib/token.h @@ -684,6 +684,13 @@ public: setFlag(fIsFinalType, b); } + bool isInitComma() const { + return getFlag(fIsInitComma); + } + void isInitComma(bool b) { + setFlag(fIsInitComma, b); + } + bool isBitfield() const { return mImpl->mBits > 0; } @@ -1307,14 +1314,14 @@ private: fIsAttributePacked = (1ULL << 15), // __attribute__((packed)) fIsAttributeExport = (1ULL << 16), // __attribute__((__visibility__("default"))), __declspec(dllexport) fIsAttributeMaybeUnused = (1ULL << 17), // [[maybe_unsed]] - fIsControlFlowKeyword = (1ULL << 18), // if/switch/while/... - fIsOperatorKeyword = (1ULL << 19), // operator=, etc - fIsComplex = (1ULL << 20), // complex/_Complex type - fIsEnumType = (1ULL << 21), // enumeration type - fIsName = (1ULL << 22), - fIsLiteral = (1ULL << 23), - fIsTemplateArg = (1ULL << 24), - fIsAttributeNodiscard = (1ULL << 25), // __attribute__ ((warn_unused_result)), [[nodiscard]] + fIsAttributeNodiscard = (1ULL << 18), // __attribute__ ((warn_unused_result)), [[nodiscard]] + fIsControlFlowKeyword = (1ULL << 19), // if/switch/while/... + fIsOperatorKeyword = (1ULL << 20), // operator=, etc + fIsComplex = (1ULL << 21), // complex/_Complex type + fIsEnumType = (1ULL << 22), // enumeration type + fIsName = (1ULL << 23), + fIsLiteral = (1ULL << 24), + fIsTemplateArg = (1ULL << 25), fAtAddress = (1ULL << 26), // @ 0x4000 fIncompleteVar = (1ULL << 27), fConstexpr = (1ULL << 28), @@ -1331,6 +1338,7 @@ private: fIsAtomic = (1ULL << 39), // Is this a _Atomic declaration fIsSimplifiedTypedef = (1ULL << 40), fIsFinalType = (1ULL << 41), // Is this a type with final specifier + fIsInitComma = (1ULL << 42), // Is this comma located inside some {..}. i.e: {1,2,3,4} }; enum : uint64_t { diff --git a/lib/valueflow.cpp b/lib/valueflow.cpp index b97389fe9..6a249fb2c 100644 --- a/lib/valueflow.cpp +++ b/lib/valueflow.cpp @@ -605,7 +605,6 @@ static ValueFlow::Value truncateImplicitConversion(Token* parent, const ValueFlo static void setTokenValue(Token* tok, ValueFlow::Value value, const Settings* settings, - bool isInitList = false, SourceLocation loc = SourceLocation::current()) { // Skip setting values that are too big since its ambiguous @@ -629,7 +628,7 @@ static void setTokenValue(Token* tok, if (!parent) return; - if (!isInitList && Token::simpleMatch(parent, ",") && astIsRHS(tok)) { + if (Token::simpleMatch(parent, ",") && !parent->isInitComma() && astIsRHS(tok)) { const Token* callParent = findParent(parent, [](const Token* p) { return !Token::simpleMatch(p, ","); }); @@ -947,7 +946,7 @@ static void setTokenValue(Token* tok, if (Token::simpleMatch(parent, "-") && value2.bound == result.bound && value2.bound != ValueFlow::Value::Bound::Point) result.invertBound(); - setTokenValue(parent, result, settings, isInitList); + setTokenValue(parent, result, settings); } } } @@ -1131,7 +1130,7 @@ size_t ValueFlow::getSizeOf(const ValueType &vt, const Settings *settings) static bool getMinMaxValues(const ValueType* vt, const cppcheck::Platform& platform, MathLib::bigint& minValue, MathLib::bigint& maxValue); // Handle various constants.. -static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, bool cpp, bool isInitList = false) +static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, bool cpp) { if ((tok->isNumber() && MathLib::isInt(tok->str())) || (tok->tokType() == Token::eChar)) { try { @@ -1145,7 +1144,7 @@ static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, b ValueFlow::Value value(signedValue); if (!tok->isTemplateArg()) value.setKnown(); - setTokenValue(tok, std::move(value), settings, isInitList); + setTokenValue(tok, std::move(value), settings); } catch (const std::exception & /*e*/) { // Bad character literal } @@ -1155,17 +1154,17 @@ static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, b value.floatValue = MathLib::toDoubleNumber(tok->str()); if (!tok->isTemplateArg()) value.setKnown(); - setTokenValue(tok, std::move(value), settings, isInitList); + setTokenValue(tok, std::move(value), settings); } else if (tok->enumerator() && tok->enumerator()->value_known) { ValueFlow::Value value(tok->enumerator()->value); if (!tok->isTemplateArg()) value.setKnown(); - setTokenValue(tok, std::move(value), settings, isInitList); + setTokenValue(tok, std::move(value), settings); } else if (tok->str() == "NULL" || (cpp && tok->str() == "nullptr")) { ValueFlow::Value value(0); if (!tok->isTemplateArg()) value.setKnown(); - setTokenValue(tok, std::move(value), settings, isInitList); + setTokenValue(tok, std::move(value), settings); } else if (Token::simpleMatch(tok, "sizeof (")) { if (tok->next()->astOperand2() && !tok->next()->astOperand2()->isLiteral() && tok->next()->astOperand2()->valueType() && (tok->next()->astOperand2()->valueType()->pointer == 0 || // <- TODO this is a bailout, abort when there are array->pointer conversions @@ -1333,17 +1332,17 @@ static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, b ValueFlow::Value value(0); if (!tok->isTemplateArg()) value.setKnown(); - setTokenValue(tok->next(), std::move(value), settings, isInitList); + setTokenValue(tok->next(), std::move(value), settings); } else if (Token::Match(tok, "%name% = {") && tok->variable() && (tok->variable()->isPointer() || (tok->variable()->valueType() && tok->variable()->valueType()->isIntegral()))) { if (Token::simpleMatch(tok->tokAt(3), "}")) { ValueFlow::Value value(0); value.setKnown(); - setTokenValue(tok->tokAt(2), std::move(value), settings, isInitList); + setTokenValue(tok->tokAt(2), std::move(value), settings); } else if (tok->tokAt(2)->astOperand1() && tok->tokAt(2)->astOperand1()->hasKnownIntValue()) { ValueFlow::Value value(tok->tokAt(2)->astOperand1()->getKnownIntValue()); value.setKnown(); - setTokenValue(tok->tokAt(2), std::move(value), settings, isInitList); + setTokenValue(tok->tokAt(2), std::move(value), settings); } } return tok->next(); @@ -1351,16 +1350,8 @@ static Token * valueFlowSetConstantValue(Token *tok, const Settings *settings, b static void valueFlowNumber(TokenList &tokenlist, const Settings* settings) { - bool isInitList = false; - const Token* endInit{}; for (Token *tok = tokenlist.front(); tok;) { - if (!isInitList && tok->str() == "{" && (Token::simpleMatch(tok->astOperand1(), ",") || Token::simpleMatch(tok->astOperand2(), ","))) { - isInitList = true; - endInit = tok->link(); - } - tok = valueFlowSetConstantValue(tok, settings, tokenlist.isCPP(), isInitList); - if (isInitList && tok == endInit) - isInitList = false; + tok = valueFlowSetConstantValue(tok, settings, tokenlist.isCPP()); } if (tokenlist.isCPP()) { @@ -9466,6 +9457,18 @@ void ValueFlow::setValues(TokenList& tokenlist, for (Token* tok = tokenlist.front(); tok; tok = tok->next()) tok->clearValueFlow(); + // commas in init.. + for (Token* tok = tokenlist.front(); tok; tok = tok->next()) { + if (tok->str() != "{" || !tok->astOperand1()) + continue; + for (Token* tok2 = tok->next(); tok2 != tok->link(); tok2 = tok2->next()) { + if (tok2->link() && Token::Match(tok2, "[{[(<]")) + tok2 = tok2->link(); + else if (tok2->str() == ",") + tok2->isInitComma(true); + } + } + ValueFlowPassRunner runner{ValueFlowState{tokenlist, symboldatabase, errorLogger, settings}, timerResults}; runner.run_once({ VFA(valueFlowEnumValue(symboldatabase, settings)), diff --git a/test/cli/test-other.py b/test/cli/test-other.py index a630699a1..360637ad5 100644 --- a/test/cli/test-other.py +++ b/test/cli/test-other.py @@ -141,3 +141,16 @@ def test_progress_j(tmpdir): assert exitcode == 0 assert stdout == "Checking {} ...\n".format(test_file) assert stderr == "" + + +@pytest.mark.timeout(10) +def test_slow_array_many_strings(tmpdir): + # 11901 + # cppcheck valueflow takes a long time when analyzing a file with many strings + filename = os.path.join(tmpdir, 'hang.c') + with open(filename, 'wt') as f: + f.write("const char *strings[] = {\n") + for i in range(20000): + f.write(' "abc",\n') + f.write("};\n") + cppcheck([filename]) # should not take more than ~1 second