Optimize tokenizing

Reorganize cppcheck.cpp to separate actual checking from
tokenizing and XML dumping. This implies splitting checkFile()
in three parts, checkRawTokens, checkNormalTokens() and
checkSimplifiedTokens().
This commit is contained in:
Albert ARIBAUD (3ADEV) 2015-12-11 10:22:06 +01:00
parent 073d5c1db4
commit 58031147b2
4 changed files with 174 additions and 132 deletions

View File

@ -108,7 +108,7 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi
return 0; return 0;
} }
// Run rules on this code // Run define rules on raw code
for (std::list<Settings::Rule>::const_iterator it = _settings.rules.begin(); it != _settings.rules.end(); ++it) { for (std::list<Settings::Rule>::const_iterator it = _settings.rules.begin(); it != _settings.rules.end(); ++it) {
if (it->tokenlist == "define") { if (it->tokenlist == "define") {
Tokenizer tokenizer2(&_settings, this); Tokenizer tokenizer2(&_settings, this);
@ -146,6 +146,10 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi
std::set<unsigned long long> checksums; std::set<unsigned long long> checksums;
unsigned int checkCount = 0; unsigned int checkCount = 0;
for (std::list<std::string>::const_iterator it = configurations.begin(); it != configurations.end(); ++it) { for (std::list<std::string>::const_iterator it = configurations.begin(); it != configurations.end(); ++it) {
// bail out if terminated
if (_settings.terminated())
break;
// Check only a few configurations (default 12), after that bail out, unless --force // Check only a few configurations (default 12), after that bail out, unless --force
// was used. // was used.
if (!_settings._force && ++checkCount > _settings._maxConfigs) if (!_settings._force && ++checkCount > _settings._maxConfigs)
@ -172,11 +176,95 @@ unsigned int CppCheck::processFile(const std::string& filename, std::istream& fi
codeWithoutCfg += _settings.append(); codeWithoutCfg += _settings.append();
if (!checkFile(codeWithoutCfg, filename.c_str(), checksums, internalErrorFound)) { Tokenizer _tokenizer(&_settings, this);
if (_settings._showtime != SHOWTIME_NONE)
_tokenizer.setTimerResults(&S_timerResults);
try {
// Create tokens, skip rest of iteration if failed
std::istringstream istr(codeWithoutCfg);
Timer timer("Tokenizer::createTokens", _settings._showtime, &S_timerResults);
bool result = _tokenizer.createTokens(istr, filename.c_str());
timer.Stop();
if (!result)
continue;
// skip rest of iteration if just checking configuration
if (_settings.checkConfiguration)
continue;
// Check raw tokens
checkRawTokens(_tokenizer);
// Simplify tokens into normal form, skip rest of iteration if failed
Timer timer2("Tokenizer::simplifyTokens1", _settings._showtime, &S_timerResults);
result = _tokenizer.simplifyTokens1(cfg);
timer2.Stop();
if (!result)
continue;
// dump xml
if (_settings.dump) {
std::ofstream fdump((filename + ".dump").c_str());
if (fdump.is_open()) {
fdump << "<?xml version=\"1.0\"?>" << std::endl;
fdump << "<dump cfg=\"" << cfg << "\">" << std::endl;
_tokenizer.dump(fdump);
fdump << "</dump>" << std::endl;
}
}
// Skip if we already met the same simplified token list
if (_settings._force || _settings._maxConfigs > 1) {
const unsigned long long checksum = _tokenizer.list.calculateChecksum();
if (checksums.find(checksum) != checksums.end())
continue;
checksums.insert(checksum);
}
// Check normal tokens
checkNormalTokens(_tokenizer);
// simplify more if required, skip rest of iteration if failed
if (_simplify) {
// if further simplification fails then skip rest of iteration
Timer timer3("Tokenizer::simplifyTokenList2", _settings._showtime, &S_timerResults);
result = _tokenizer.simplifyTokenList2();
timer3.Stop();
if (!result)
continue;
// Check simplified tokens
checkSimplifiedTokens(_tokenizer);
}
} catch (const InternalError &e) {
if (_settings.isEnabled("information") && (_settings.debug || _settings._verbose)) if (_settings.isEnabled("information") && (_settings.debug || _settings._verbose))
purgedConfigurationMessage(filename, cfg); purgedConfigurationMessage(filename, cfg);
internalErrorFound=true;
std::list<ErrorLogger::ErrorMessage::FileLocation> locationList;
ErrorLogger::ErrorMessage::FileLocation loc;
if (e.token) {
loc.line = e.token->linenr();
const std::string fixedpath = Path::toNativeSeparators(_tokenizer.list.file(e.token));
loc.setfile(fixedpath);
} else {
ErrorLogger::ErrorMessage::FileLocation loc2;
loc2.setfile(Path::toNativeSeparators(filename.c_str()));
locationList.push_back(loc2);
loc.setfile(_tokenizer.list.getSourceFilePath());
}
locationList.push_back(loc);
const ErrorLogger::ErrorMessage errmsg(locationList,
Severity::error,
e.errorMessage,
e.id,
false);
reportErr(errmsg);
} }
} }
} catch (const std::runtime_error &e) { } catch (const std::runtime_error &e) {
internalError(filename, e.what()); internalError(filename, e.what());
} catch (const InternalError &e) { } catch (const InternalError &e) {
@ -222,128 +310,57 @@ void CppCheck::internalError(const std::string &filename, const std::string &msg
} }
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
// CppCheck - A function that checks a specified file // CppCheck - A function that checks a raw token list
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
bool CppCheck::checkFile(const std::string &code, const char FileName[], std::set<unsigned long long>& checksums, bool& internalErrorFound) void CppCheck::checkRawTokens(const Tokenizer &tokenizer)
{ {
internalErrorFound=false;
if (_settings.terminated() || _settings.checkConfiguration)
return true;
Tokenizer _tokenizer(&_settings, this);
if (_settings._showtime != SHOWTIME_NONE)
_tokenizer.setTimerResults(&S_timerResults);
try {
// Execute rules for "raw" code // Execute rules for "raw" code
for (std::list<Settings::Rule>::const_iterator it = _settings.rules.begin(); it != _settings.rules.end(); ++it) { executeRules("raw", tokenizer);
if (it->tokenlist == "raw") { }
Tokenizer tokenizer2(&_settings, this);
std::istringstream istr(code);
tokenizer2.list.createTokens(istr, FileName);
executeRules("raw", tokenizer2);
break;
}
}
// Tokenize the file //---------------------------------------------------------------------------
std::istringstream istr(code); // CppCheck - A function that checks a normal token list
//---------------------------------------------------------------------------
Timer timer("Tokenizer::tokenize", _settings._showtime, &S_timerResults);
bool result = _tokenizer.tokenize(istr, FileName, cfg);
timer.Stop();
if (_settings._force || _settings._maxConfigs > 1) {
const unsigned long long checksum = _tokenizer.list.calculateChecksum();
if (checksums.find(checksum) != checksums.end())
return false;
checksums.insert(checksum);
}
if (!result) {
// File had syntax errors, abort
return true;
}
// dump
if (_settings.dump) {
std::string dumpfile = std::string(FileName) + ".dump";
std::ofstream fdump(dumpfile.c_str());
if (fdump.is_open()) {
fdump << "<?xml version=\"1.0\"?>" << std::endl;
fdump << "<dump cfg=\"" << cfg << "\">" << std::endl;
_tokenizer.dump(fdump);
fdump << "</dump>" << std::endl;
}
return true;
}
void CppCheck::checkNormalTokens(const Tokenizer &tokenizer)
{
// call all "runChecks" in all registered Check classes // call all "runChecks" in all registered Check classes
for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) {
if (_settings.terminated()) if (_settings.terminated())
return true; return;
Timer timerRunChecks((*it)->name() + "::runChecks", _settings._showtime, &S_timerResults); Timer timerRunChecks((*it)->name() + "::runChecks", _settings._showtime, &S_timerResults);
(*it)->runChecks(&_tokenizer, &_settings, this); (*it)->runChecks(&tokenizer, &_settings, this);
} }
// Analyse the tokens.. // Analyse the tokens..
for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) {
Check::FileInfo *fi = (*it)->getFileInfo(&_tokenizer, &_settings); Check::FileInfo *fi = (*it)->getFileInfo(&tokenizer, &_settings);
if (fi != nullptr) if (fi != nullptr)
fileInfo.push_back(fi); fileInfo.push_back(fi);
} }
executeRules("normal", _tokenizer); executeRules("normal", tokenizer);
}
if (!_simplify) //---------------------------------------------------------------------------
return true; // CppCheck - A function that checks a simplified token list
//---------------------------------------------------------------------------
Timer timer3("Tokenizer::simplifyTokenList2", _settings._showtime, &S_timerResults);
result = _tokenizer.simplifyTokenList2();
timer3.Stop();
if (!result)
return true;
void CppCheck::checkSimplifiedTokens(const Tokenizer &tokenizer)
{
// call all "runSimplifiedChecks" in all registered Check classes // call all "runSimplifiedChecks" in all registered Check classes
for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) { for (std::list<Check *>::const_iterator it = Check::instances().begin(); it != Check::instances().end(); ++it) {
if (_settings.terminated()) if (_settings.terminated())
return true; return;
Timer timerSimpleChecks((*it)->name() + "::runSimplifiedChecks", _settings._showtime, &S_timerResults); Timer timerSimpleChecks((*it)->name() + "::runSimplifiedChecks", _settings._showtime, &S_timerResults);
(*it)->runSimplifiedChecks(&_tokenizer, &_settings, this); (*it)->runSimplifiedChecks(&tokenizer, &_settings, this);
timerSimpleChecks.Stop();
} }
if (_settings.terminated()) if (!_settings.terminated())
return true; executeRules("simple", tokenizer);
executeRules("simple", _tokenizer);
if (_settings.terminated())
return true;
} catch (const InternalError &e) {
internalErrorFound=true;
std::list<ErrorLogger::ErrorMessage::FileLocation> locationList;
ErrorLogger::ErrorMessage::FileLocation loc;
if (e.token) {
loc.line = e.token->linenr();
const std::string fixedpath = Path::toNativeSeparators(_tokenizer.list.file(e.token));
loc.setfile(fixedpath);
} else {
ErrorLogger::ErrorMessage::FileLocation loc2;
loc2.setfile(Path::toNativeSeparators(FileName));
locationList.push_back(loc2);
loc.setfile(_tokenizer.list.getSourceFilePath());
}
locationList.push_back(loc);
const ErrorLogger::ErrorMessage errmsg(locationList,
Severity::error,
e.errorMessage,
e.id,
false);
reportErr(errmsg);
}
return true;
} }
void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer) void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer)

View File

@ -145,14 +145,22 @@ private:
unsigned int processFile(const std::string& filename, std::istream& fileStream); unsigned int processFile(const std::string& filename, std::istream& fileStream);
/** /**
* @brief Check file * @brief Check raw tokens
* @param code * @param tokenizer
* @param FileName
* @param checksums
* @param[out] internalErrorFound will be set to true if an internal has been caught, false else
* @return false if file has been checked before, true else !?
*/ */
bool checkFile(const std::string &code, const char FileName[], std::set<unsigned long long>& checksums, bool& internalErrorFound); void checkRawTokens(const Tokenizer &tokenizer);
/**
* @brief Check normal tokens
* @param tokenizer
*/
void checkNormalTokens(const Tokenizer &tokenizer);
/**
* @brief Check simplified tokens
* @param tokenizer
*/
void checkSimplifiedTokens(const Tokenizer &tokenizer);
/** /**
* @brief Execute rules, if any * @brief Execute rules, if any

View File

@ -1710,23 +1710,24 @@ void Tokenizer::simplifyMulAndParens()
} }
} }
bool Tokenizer::tokenize(std::istream &code, bool Tokenizer::createTokens(std::istream &code,
const char FileName[], const char FileName[])
const std::string &configuration,
bool noSymbolDB_AST)
{ {
// make sure settings specified // make sure settings specified
assert(_settings); assert(_settings);
return list.createTokens(code, Path::getRelativePath(Path::simplifyPath(FileName), _settings->_basePaths));
}
bool Tokenizer::simplifyTokens1(const std::string &configuration,
bool noSymbolDB_AST)
{
// Fill the map _typeSize.. // Fill the map _typeSize..
fillTypeSizes(); fillTypeSizes();
_configuration = configuration; _configuration = configuration;
if (!list.createTokens(code, Path::getRelativePath(Path::simplifyPath(FileName), _settings->_basePaths))) if (simplifyTokenList1(list.getFiles()[0].c_str())) {
cppcheckError(nullptr);
if (simplifyTokenList1(FileName)) {
if (!noSymbolDB_AST) { if (!noSymbolDB_AST) {
createSymbolDatabase(); createSymbolDatabase();
@ -1754,6 +1755,17 @@ bool Tokenizer::tokenize(std::istream &code,
} }
return false; return false;
} }
bool Tokenizer::tokenize(std::istream &code,
const char FileName[],
const std::string &configuration,
bool noSymbolDB_AST)
{
if (!createTokens(code, FileName))
return false;
return simplifyTokens1(configuration, noSymbolDB_AST);
}
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
bool Tokenizer::tokenizeCondition(const std::string &code) bool Tokenizer::tokenizeCondition(const std::string &code)

View File

@ -75,6 +75,11 @@ public:
*/ */
bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const; bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const;
bool createTokens(std::istream &code,
const char FileName[]);
bool simplifyTokens1(const std::string &configuration,
bool noSymbolDB_AST = false);
/** /**
* Tokenize code * Tokenize code
* @param code input stream for code, e.g. * @param code input stream for code, e.g.