From 58031147b20f725215143f21617a316aab88bfed Mon Sep 17 00:00:00 2001 From: "Albert ARIBAUD (3ADEV)" Date: Fri, 11 Dec 2015 10:22:06 +0100 Subject: [PATCH] Optimize tokenizing Reorganize cppcheck.cpp to separate actual checking from tokenizing and XML dumping. This implies splitting checkFile() in three parts, checkRawTokens, checkNormalTokens() and checkSimplifiedTokens(). --- lib/cppcheck.cpp | 251 +++++++++++++++++++++++++---------------------- lib/cppcheck.h | 22 +++-- lib/tokenize.cpp | 28 ++++-- lib/tokenize.h | 5 + 4 files changed, 174 insertions(+), 132 deletions(-) diff --git a/lib/cppcheck.cpp b/lib/cppcheck.cpp index e01a3a29c..2f34a0c05 100644 --- a/lib/cppcheck.cpp +++ b/lib/cppcheck.cpp @@ -108,7 +108,7 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi return 0; } - // Run rules on this code + // Run define rules on raw code for (std::list::const_iterator it = _settings.rules.begin(); it != _settings.rules.end(); ++it) { if (it->tokenlist == "define") { Tokenizer tokenizer2(&_settings, this); @@ -146,6 +146,10 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi std::set checksums; unsigned int checkCount = 0; for (std::list::const_iterator it = configurations.begin(); it != configurations.end(); ++it) { + // bail out if terminated + if (_settings.terminated()) + break; + // Check only a few configurations (default 12), after that bail out, unless --force // was used. if (!_settings._force && ++checkCount > _settings._maxConfigs) @@ -172,11 +176,95 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi codeWithoutCfg += _settings.append(); - if (!checkFile(codeWithoutCfg, filename.c_str(), checksums, internalErrorFound)) { + Tokenizer _tokenizer(&_settings, this); + if (_settings._showtime != SHOWTIME_NONE) + _tokenizer.setTimerResults(&S_timerResults); + + try { + // Create tokens, skip rest of iteration if failed + std::istringstream istr(codeWithoutCfg); + Timer timer("Tokenizer::createTokens", _settings._showtime, &S_timerResults); + bool result = _tokenizer.createTokens(istr, filename.c_str()); + timer.Stop(); + if (!result) + continue; + + // skip rest of iteration if just checking configuration + if (_settings.checkConfiguration) + continue; + + // Check raw tokens + checkRawTokens(_tokenizer); + + // Simplify tokens into normal form, skip rest of iteration if failed + Timer timer2("Tokenizer::simplifyTokens1", _settings._showtime, &S_timerResults); + result = _tokenizer.simplifyTokens1(cfg); + timer2.Stop(); + if (!result) + continue; + + // dump xml + if (_settings.dump) { + std::ofstream fdump((filename + ".dump").c_str()); + if (fdump.is_open()) { + fdump << "" << std::endl; + fdump << "" << std::endl; + _tokenizer.dump(fdump); + fdump << "" << std::endl; + } + } + + // Skip if we already met the same simplified token list + if (_settings._force || _settings._maxConfigs > 1) { + const unsigned long long checksum = _tokenizer.list.calculateChecksum(); + if (checksums.find(checksum) != checksums.end()) + continue; + checksums.insert(checksum); + } + + // Check normal tokens + checkNormalTokens(_tokenizer); + + // simplify more if required, skip rest of iteration if failed + if (_simplify) { + // if further simplification fails then skip rest of iteration + Timer timer3("Tokenizer::simplifyTokenList2", _settings._showtime, &S_timerResults); + result = _tokenizer.simplifyTokenList2(); + timer3.Stop(); + if (!result) + continue; + + // Check simplified tokens + checkSimplifiedTokens(_tokenizer); + } + + } catch (const InternalError &e) { if (_settings.isEnabled("information") && (_settings.debug || _settings._verbose)) purgedConfigurationMessage(filename, cfg); + internalErrorFound=true; + std::list locationList; + ErrorLogger::ErrorMessage::FileLocation loc; + if (e.token) { + loc.line = e.token->linenr(); + const std::string fixedpath = Path::toNativeSeparators(_tokenizer.list.file(e.token)); + loc.setfile(fixedpath); + } else { + ErrorLogger::ErrorMessage::FileLocation loc2; + loc2.setfile(Path::toNativeSeparators(filename.c_str())); + locationList.push_back(loc2); + loc.setfile(_tokenizer.list.getSourceFilePath()); + } + locationList.push_back(loc); + const ErrorLogger::ErrorMessage errmsg(locationList, + Severity::error, + e.errorMessage, + e.id, + false); + + reportErr(errmsg); } } + } catch (const std::runtime_error &e) { internalError(filename, e.what()); } catch (const InternalError &e) { @@ -222,128 +310,57 @@ void CppCheck::internalError(const std::string &filename, const std::string &msg } //--------------------------------------------------------------------------- -// CppCheck - A function that checks a specified file +// CppCheck - A function that checks a raw token list //--------------------------------------------------------------------------- -bool CppCheck::checkFile(const std::string &code, const char FileName[], std::set& checksums, bool& internalErrorFound) +void CppCheck::checkRawTokens(const Tokenizer &tokenizer) { - internalErrorFound=false; - if (_settings.terminated() || _settings.checkConfiguration) - return true; + // Execute rules for "raw" code + executeRules("raw", tokenizer); +} - Tokenizer _tokenizer(&_settings, this); - if (_settings._showtime != SHOWTIME_NONE) - _tokenizer.setTimerResults(&S_timerResults); - try { - // Execute rules for "raw" code - for (std::list::const_iterator it = _settings.rules.begin(); it != _settings.rules.end(); ++it) { - if (it->tokenlist == "raw") { - Tokenizer tokenizer2(&_settings, this); - std::istringstream istr(code); - tokenizer2.list.createTokens(istr, FileName); - executeRules("raw", tokenizer2); - break; - } - } - - // Tokenize the file - std::istringstream istr(code); - - Timer timer("Tokenizer::tokenize", _settings._showtime, &S_timerResults); - bool result = _tokenizer.tokenize(istr, FileName, cfg); - timer.Stop(); - - if (_settings._force || _settings._maxConfigs > 1) { - const unsigned long long checksum = _tokenizer.list.calculateChecksum(); - if (checksums.find(checksum) != checksums.end()) - return false; - checksums.insert(checksum); - } - - if (!result) { - // File had syntax errors, abort - return true; - } - - // dump - if (_settings.dump) { - std::string dumpfile = std::string(FileName) + ".dump"; - std::ofstream fdump(dumpfile.c_str()); - if (fdump.is_open()) { - fdump << "" << std::endl; - fdump << "" << std::endl; - _tokenizer.dump(fdump); - fdump << "" << std::endl; - } - return true; - } - - // call all "runChecks" in all registered Check classes - for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { - if (_settings.terminated()) - return true; - - Timer timerRunChecks((*it)->name() + "::runChecks", _settings._showtime, &S_timerResults); - (*it)->runChecks(&_tokenizer, &_settings, this); - } - - // Analyse the tokens.. - for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { - Check::FileInfo *fi = (*it)->getFileInfo(&_tokenizer, &_settings); - if (fi != nullptr) - fileInfo.push_back(fi); - } - - executeRules("normal", _tokenizer); - - if (!_simplify) - return true; - - Timer timer3("Tokenizer::simplifyTokenList2", _settings._showtime, &S_timerResults); - result = _tokenizer.simplifyTokenList2(); - timer3.Stop(); - if (!result) - return true; - - // call all "runSimplifiedChecks" in all registered Check classes - for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { - if (_settings.terminated()) - return true; - - Timer timerSimpleChecks((*it)->name() + "::runSimplifiedChecks", _settings._showtime, &S_timerResults); - (*it)->runSimplifiedChecks(&_tokenizer, &_settings, this); - } +//--------------------------------------------------------------------------- +// CppCheck - A function that checks a normal token list +//--------------------------------------------------------------------------- +void CppCheck::checkNormalTokens(const Tokenizer &tokenizer) +{ + // call all "runChecks" in all registered Check classes + for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { if (_settings.terminated()) - return true; + return; - executeRules("simple", _tokenizer); - - if (_settings.terminated()) - return true; - } catch (const InternalError &e) { - internalErrorFound=true; - std::list locationList; - ErrorLogger::ErrorMessage::FileLocation loc; - if (e.token) { - loc.line = e.token->linenr(); - const std::string fixedpath = Path::toNativeSeparators(_tokenizer.list.file(e.token)); - loc.setfile(fixedpath); - } else { - ErrorLogger::ErrorMessage::FileLocation loc2; - loc2.setfile(Path::toNativeSeparators(FileName)); - locationList.push_back(loc2); - loc.setfile(_tokenizer.list.getSourceFilePath()); - } - locationList.push_back(loc); - const ErrorLogger::ErrorMessage errmsg(locationList, - Severity::error, - e.errorMessage, - e.id, - false); - - reportErr(errmsg); + Timer timerRunChecks((*it)->name() + "::runChecks", _settings._showtime, &S_timerResults); + (*it)->runChecks(&tokenizer, &_settings, this); } - return true; + + // Analyse the tokens.. + for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { + Check::FileInfo *fi = (*it)->getFileInfo(&tokenizer, &_settings); + if (fi != nullptr) + fileInfo.push_back(fi); + } + + executeRules("normal", tokenizer); +} + +//--------------------------------------------------------------------------- +// CppCheck - A function that checks a simplified token list +//--------------------------------------------------------------------------- + +void CppCheck::checkSimplifiedTokens(const Tokenizer &tokenizer) +{ + // call all "runSimplifiedChecks" in all registered Check classes + for (std::list::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { + if (_settings.terminated()) + return; + + Timer timerSimpleChecks((*it)->name() + "::runSimplifiedChecks", _settings._showtime, &S_timerResults); + (*it)->runSimplifiedChecks(&tokenizer, &_settings, this); + timerSimpleChecks.Stop(); + } + + if (!_settings.terminated()) + executeRules("simple", tokenizer); } void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer) diff --git a/lib/cppcheck.h b/lib/cppcheck.h index 5c6cf7404..3e6684c2d 100644 --- a/lib/cppcheck.h +++ b/lib/cppcheck.h @@ -145,14 +145,22 @@ private: unsigned int processFile(const std::string& filename, std::istream& fileStream); /** - * @brief Check file - * @param code - * @param FileName - * @param checksums - * @param[out] internalErrorFound will be set to true if an internal has been caught, false else - * @return false if file has been checked before, true else !? + * @brief Check raw tokens + * @param tokenizer */ - bool checkFile(const std::string &code, const char FileName[], std::set& checksums, bool& internalErrorFound); + void checkRawTokens(const Tokenizer &tokenizer); + + /** + * @brief Check normal tokens + * @param tokenizer + */ + void checkNormalTokens(const Tokenizer &tokenizer); + + /** + * @brief Check simplified tokens + * @param tokenizer + */ + void checkSimplifiedTokens(const Tokenizer &tokenizer); /** * @brief Execute rules, if any diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index 94e14f97d..427558260 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -1710,23 +1710,24 @@ void Tokenizer::simplifyMulAndParens() } } -bool Tokenizer::tokenize(std::istream &code, - const char FileName[], - const std::string &configuration, - bool noSymbolDB_AST) +bool Tokenizer::createTokens(std::istream &code, + const char FileName[]) { // make sure settings specified assert(_settings); + return list.createTokens(code, Path::getRelativePath(Path::simplifyPath(FileName), _settings->_basePaths)); +} + +bool Tokenizer::simplifyTokens1(const std::string &configuration, + bool noSymbolDB_AST) +{ // Fill the map _typeSize.. fillTypeSizes(); _configuration = configuration; - if (!list.createTokens(code, Path::getRelativePath(Path::simplifyPath(FileName), _settings->_basePaths))) - cppcheckError(nullptr); - - if (simplifyTokenList1(FileName)) { + if (simplifyTokenList1(list.getFiles()[0].c_str())) { if (!noSymbolDB_AST) { createSymbolDatabase(); @@ -1754,6 +1755,17 @@ bool Tokenizer::tokenize(std::istream &code, } return false; } + +bool Tokenizer::tokenize(std::istream &code, + const char FileName[], + const std::string &configuration, + bool noSymbolDB_AST) +{ + if (!createTokens(code, FileName)) + return false; + + return simplifyTokens1(configuration, noSymbolDB_AST); +} //--------------------------------------------------------------------------- bool Tokenizer::tokenizeCondition(const std::string &code) diff --git a/lib/tokenize.h b/lib/tokenize.h index fd17baaa5..b374578f5 100644 --- a/lib/tokenize.h +++ b/lib/tokenize.h @@ -75,6 +75,11 @@ public: */ bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const; + bool createTokens(std::istream &code, + const char FileName[]); + + bool simplifyTokens1(const std::string &configuration, + bool noSymbolDB_AST = false); /** * Tokenize code * @param code input stream for code, e.g.