/* * Cppcheck - A tool for static C/C++ code analysis * Copyright (C) 2007-2009 Daniel Marjamäki, Reijo Tomperi, Nicolas Le Cam, * Leandro Penz, Kimmo Varis * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see #include #include #include #include #include #include #include #include #include //--------------------------------------------------------------------------- Tokenizer::Tokenizer() { _tokens = 0; _tokensBack = 0; } Tokenizer::~Tokenizer() { DeallocateTokens(); } //--------------------------------------------------------------------------- // Helper functions.. //--------------------------------------------------------------------------- const Token *Tokenizer::tokens() const { return _tokens; } const std::vector *Tokenizer::getFiles() const { return &_files; } //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // addtoken // add a token. Used by 'Tokenizer' //--------------------------------------------------------------------------- void Tokenizer::addtoken(const char str[], const unsigned int lineno, const unsigned int fileno) { if (str[0] == 0) return; // Replace hexadecimal value with decimal std::ostringstream str2; if (strncmp(str, "0x", 2) == 0) { str2 << std::strtoul(str + 2, NULL, 16); } else { str2 << str; } if (_tokensBack) { _tokensBack->insertToken(str2.str().c_str()); _tokensBack = _tokensBack->next(); } else { _tokens = new Token; _tokensBack = _tokens; _tokensBack->str(str2.str().c_str()); } _tokensBack->linenr(lineno); _tokensBack->fileIndex(fileno); } //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // SizeOfType - gives the size of a type //--------------------------------------------------------------------------- int Tokenizer::SizeOfType(const char type[]) const { if (!type) return 0; std::map::const_iterator it = _typeSize.find(type); if (it == _typeSize.end()) return 0; return it->second; } //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // InsertTokens - Copy and insert tokens //--------------------------------------------------------------------------- void Tokenizer::InsertTokens(Token *dest, Token *src, unsigned int n) { while (n > 0) { dest->insertToken(src->aaaa()); dest = dest->next(); dest->fileIndex(src->fileIndex()); dest->linenr(src->linenr()); dest->varId(src->varId()); src = src->next(); --n; } } //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // Tokenize - tokenizes a given file. //--------------------------------------------------------------------------- void Tokenizer::tokenize(std::istream &code, const char FileName[]) { // The "_files" vector remembers what files have been tokenized.. _files.push_back(FileLister::simplifyPath(FileName)); // line number in parsed code unsigned int lineno = 1; // The current token being parsed std::string CurrentToken; // lineNumbers holds line numbers for files in fileIndexes // every time an include file is complitely parsed, last item in the vector // is removed and lineno is set to point to that value. std::vector lineNumbers; // fileIndexes holds index for _files vector about currently parsed files // every time an include file is complitely parsed, last item in the vector // is removed and FileIndex is set to point to that value. std::vector fileIndexes; // FileIndex. What file in the _files vector is read now? unsigned int FileIndex = 0; // Read one byte at a time from code and create tokens for (char ch = (char)code.get(); code.good(); ch = (char)code.get()) { // We are not handling UTF and stuff like that. Code is supposed to plain simple text. if (ch < 0) continue; if (ch == '\n') { // Add current token.. addtoken(CurrentToken.c_str(), lineno++, FileIndex); CurrentToken.clear(); continue; } // char.. if (ch == '\'') { // Add previous token addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); // Read this .. CurrentToken += ch; CurrentToken += (char)code.get(); CurrentToken += (char)code.get(); if (CurrentToken[1] == '\\') CurrentToken += (char)code.get(); // Add token and start on next.. addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } // String.. if (ch == '\"') { addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); bool special = false; char c = ch; do { // Append token.. CurrentToken += c; if (c == '\n') ++lineno; // Special sequence '\.' if (special) special = false; else special = (c == '\\'); // Get next character c = (char)code.get(); } while (code.good() && (special || c != '\"')); CurrentToken += '\"'; addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } if (ch == '#' && CurrentToken.empty()) { // If previous token was "#" then append this to create a "##" token if (Token::simpleMatch(_tokensBack, "#")) { _tokensBack->str("##"); continue; } std::string line("#"); { char chPrev = '#'; bool skip = false; while (code.good()) { ch = (char)code.get(); if (chPrev != '\\' && ch == '\n') break; if (chPrev == '\\') line += chPrev; if (chPrev == '#' && ch == '#') { addtoken("##", lineno, FileIndex); skip = true; break; } if (ch != ' ') chPrev = ch; if (ch != '\\' && ch != '\n') { line += ch; } if (ch == '\n') ++lineno; } if (skip) continue; } if (strncmp(line.c_str(), "#file", 5) == 0 && line.find("\"") != std::string::npos) { // Extract the filename line.erase(0, line.find("\"") + 1); if (line.find("\"") != std::string::npos) line.erase(line.find("\"")); // Has this file been tokenized already? ++lineno; bool foundOurfile = false; fileIndexes.push_back(FileIndex); for (unsigned int i = 0; i < _files.size(); i++) { if (FileLister::SameFileName(_files[i].c_str(), line.c_str())) { // Use this index foundOurfile = true; FileIndex = i; } } if (!foundOurfile) { // The "_files" vector remembers what files have been tokenized.. _files.push_back(FileLister::simplifyPath(line.c_str())); FileIndex = static_cast(_files.size() - 1); } lineNumbers.push_back(lineno); lineno = 1; continue; } else if (strncmp(line.c_str(), "#endfile", 8) == 0) { if (lineNumbers.empty() || fileIndexes.empty()) { std::cerr << "####### Preprocessor bug! #######\n"; std::exit(0); } lineno = lineNumbers.back(); lineNumbers.pop_back(); FileIndex = fileIndexes.back(); fileIndexes.pop_back(); continue; } else { addtoken(line.c_str(), lineno, FileIndex); } } if (strchr("#+-*/%&|^?!=<>[](){};:,.~", ch)) { if (ch == '.' && CurrentToken.length() > 0 && std::isdigit(CurrentToken[0])) { // Don't separate doubles "5.4" } else if (strchr("+-", ch) && CurrentToken.length() > 0 && std::isdigit(CurrentToken[0]) && CurrentToken[CurrentToken.length()-1] == 'e') { // Don't separate doubles "4.2e+10" } else { addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); CurrentToken += ch; // Add "++", "--" or ">>" token if ((ch == '+' || ch == '-' || ch == '>') && (code.peek() == ch)) CurrentToken += (char)code.get(); addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } } if (std::isspace(ch) || std::iscntrl(ch)) { addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } CurrentToken += ch; } addtoken(CurrentToken.c_str(), lineno, FileIndex); // Combine tokens.. for (Token *tok = _tokens; tok && tok->next(); tok = tok->next()) { static const char* combineWithNext[][3] = { { "<", "<", "<<" }, { "&", "&", "&&" }, { "|", "|", "||" }, { "+", "=", "+=" }, { "-", "=", "-=" }, { "*", "=", "*=" }, { "/", "=", "/=" }, { "&", "=", "&=" }, { "|", "=", "|=" }, { "=", "=", "==" }, { "!", "=", "!=" }, { "<", "=", "<=" }, { ">", "=", ">=" }, { ":", ":", "::" }, { "-", ">", "." }, // Replace "->" with "." { "private", ":", "private:" }, { "protected", ":", "protected:" }, { "public", ":", "public:" } }; for (unsigned ui = 0; ui < sizeof(combineWithNext) / sizeof(combineWithNext[0]); ui++) { if (tok->str() == combineWithNext[ui][0] && tok->next()->str() == combineWithNext[ui][1]) { tok->str(combineWithNext[ui][2]); tok->deleteNext(); } } } // typedef.. for (Token *tok = _tokens; tok;) { if (Token::Match(tok, "typedef %type% %type% ;")) { const char *type1 = tok->strAt(1); const char *type2 = tok->strAt(2); tok = const_cast(tok->tokAt(4)); for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == type2) tok2->str(type1); } continue; } else if (Token::Match(tok, "typedef %type% %type% %type% ;")) { const char *type1 = tok->strAt(1); const char *type2 = tok->strAt(2); const char *type3 = tok->strAt(3); tok = const_cast(tok->tokAt(5)); for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == type3) { tok2->str(type1); tok2->insertToken(type2); tok2 = tok2->next(); } } continue; } tok = tok->next(); } // Remove __asm.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok->next(), "__asm {")) { while (tok->next()) { bool last = Token::simpleMatch(tok->next(), "}"); // Unlink and delete tok->next() tok->deleteNext(); // break if this was the last token to delete.. if (last) break; } } } // Remove "volatile" while (Token::simpleMatch(_tokens, "volatile")) { Token *tok = _tokens; _tokens = _tokens->next(); delete tok; } for (Token *tok = _tokens; tok; tok = tok->next()) { while (Token::simpleMatch(tok->next(), "volatile")) { tok->deleteNext(); } } // Remove "mutable" while (Token::simpleMatch(_tokens, "mutable")) { Token *tok = _tokens; _tokens = _tokens->next(); delete tok; } for (Token *tok = _tokens; tok; tok = tok->next()) { while (Token::simpleMatch(tok->next(), "mutable")) { tok->deleteNext(); } } } //--------------------------------------------------------------------------- void Tokenizer::setVarId() { // Clear all variable ids for (Token *tok = _tokens; tok; tok = tok->next()) tok->varId(0); // Set variable ids.. bool firstMatch; unsigned int _varId = 0; for (Token *tok = _tokens; tok; tok = tok->next()) { if (tok != _tokens && !Token::Match(tok, "[;{}(]")) continue; if (Token::Match(tok, "[;{}(] %any%")) tok = tok->next(); if (!(firstMatch = Token::Match(tok, "%type% *| %var%")) && !Token::Match(tok, "%type% %type% *| %var%")) continue; // Determine name of declared variable.. const char *varname = 0; Token *tok2 = tok->tokAt(firstMatch ? 1 : 2); while (tok2 && ! Token::Match(tok2, "[;[=(]")) { if (tok2->isName()) varname = tok2->strAt(0); else if (tok2->str() != "*") break; tok2 = tok2->next(); } // Variable declaration found => Set variable ids if (Token::Match(tok2, "[;[=]") && varname) { ++_varId; int indentlevel = 0; int parlevel = 0; bool dot = false; for (tok2 = tok->next(); tok2; tok2 = tok2->next()) { if (!dot && tok2->str() == varname) tok2->varId(_varId); else if (tok2->str() == "{") ++indentlevel; else if (tok2->str() == "}") { --indentlevel; if (indentlevel < 0) break; } else if (tok2->str() == "(") ++parlevel; else if (tok2->str() == ")") { // Is this a function parameter or a variable declared in for example a for loop? if (parlevel == 0 && indentlevel == 0 && Token::Match(tok2, ") const| {")) ; else --parlevel; } else if (parlevel < 0 && tok2->str() == ";") break; dot = bool(tok2->str() == "."); } } } // Struct/Class members for (Token *tok = _tokens; tok; tok = tok->next()) { if (tok->varId() != 0 && Token::Match(tok->next(), ". %var%") && tok->tokAt(2)->varId() == 0) { ++_varId; const std::string pattern(std::string(". ") + tok->strAt(2)); for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->varId() == tok->varId() && Token::simpleMatch(tok2->next(), pattern.c_str())) tok2->next()->next()->varId(_varId); } } } } //--------------------------------------------------------------------------- // Simplify token list //--------------------------------------------------------------------------- void Tokenizer::simplifyTokenList() { // Combine strings for (Token *tok = _tokens; tok; tok = tok->next()) { while (tok->str()[0] == '"' && tok->next() && tok->next()->str()[0] == '"') { // Two strings after each other, combine them std::string temp = tok->str(); temp.erase(temp.length() - 1); temp.append(tok->next()->str().substr(1)); tok->str(temp.c_str()); tok->deleteNext(); } } // Remove unwanted keywords static const char* unwantedWords[] = { "unsigned", "unlikely" }; for (Token *tok = _tokens; tok; tok = tok->next()) { for (unsigned ui = 0; ui < sizeof(unwantedWords) / sizeof(unwantedWords[0]) && tok->next(); ui++) { if (tok->next()->str() == unwantedWords[ui]) { tok->deleteNext(); break; } } } // Convert + + into + and + - into - for (Token *tok = _tokens; tok; tok = tok->next()) { while (tok->next()) { if (tok->str() == "+") { if (tok->next()->str() == "+") { tok->deleteNext(); continue; } else if (tok->next()->str() == "-") { tok->str("-"); tok->deleteNext(); continue; } } else if (tok->str() == "-") { if (tok->next()->str() == "-") { tok->str("+"); tok->deleteNext(); continue; } else if (tok->next()->str() == "+") { tok->deleteNext(); continue; } } break; } } // Fill the map _typeSize.. _typeSize.clear(); _typeSize["char"] = sizeof(char); _typeSize["short"] = sizeof(short); _typeSize["int"] = sizeof(int); _typeSize["long"] = sizeof(long); _typeSize["float"] = sizeof(float); _typeSize["double"] = sizeof(double); for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::Match(tok, "class|struct %var%")) { _typeSize[tok->strAt(1)] = 100; } } // Replace 'sizeof(var)'.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::Match(tok, "[;{}] %type% %var% ;") && tok->tokAt(2)->varId() > 0) { const unsigned int varid = tok->tokAt(2)->varId(); // Replace 'sizeof(var)' with 'sizeof(type)' int indentlevel = 0; for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == "{") ++indentlevel; else if (tok2->str() == "}") { --indentlevel; if (indentlevel < 0) break; } else if (Token::Match(tok2, "sizeof ( %varid% )", varid)) { tok2 = tok2->tokAt(2); tok2->str(tok->strAt(1)); } } } } // Replace 'sizeof(type)'.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (tok->str() != "sizeof") continue; if (tok->strAt(1) != std::string("(")) { // Add parenthesis around the sizeof for (Token *tempToken = tok->next(); tempToken; tempToken = tempToken->next()) { if (Token::Match(tempToken, "%var%")) { if (Token::Match(tempToken->next(), ".")) { // We are checking a class or struct, search next varname tempToken = tempToken->tokAt(1); continue; } else if (Token::Match(tempToken->next(), "- >")) { // We are checking a class or struct, search next varname tempToken = tempToken->tokAt(2); continue; } else if (Token::Match(tempToken->next(), "++") || Token::Match(tempToken->next(), "--")) { // We have variable++ or variable--, there should be // nothing after this tempToken = tempToken->tokAt(2); } else if (Token::Match(tempToken->next(), "[")) { // TODO: We need to find closing ], then check for // dots and arrows "var[some[0]]->other" // But for now, just bail out break; } // Ok, we should be clean. Add ) after tempToken tok->insertToken("("); tempToken->insertToken(")"); break; } } } if (Token::Match(tok, "sizeof ( %type% * )")) { std::ostringstream str; // 'sizeof(type *)' has the same size as 'sizeof(char *)' str << sizeof(char *); tok->str(str.str().c_str()); for (int i = 0; i < 4; i++) { tok->deleteNext(); } } else if (Token::Match(tok, "sizeof ( %type% )")) { const char *type = tok->strAt(2); int size = SizeOfType(type); if (size > 0) { std::ostringstream str; str << size; tok->str(str.str().c_str()); for (int i = 0; i < 3; i++) { tok->deleteNext(); } } } else if (Token::Match(tok, "sizeof ( * %var% )") || Token::Match(tok, "sizeof ( %var% [ %num% ] )")) { // Some default value.. int sz = 100; unsigned int varid = tok->tokAt((tok->tokAt(2)->str() == "*") ? 3 : 2)->varId(); if (varid != 0) { // Try to locate variable declaration.. const Token *decltok = Token::findmatch(_tokens, "%type% %varid% [", varid); if (decltok) { sz = SizeOfType(decltok->strAt(0)); } } std::ostringstream ostr; ostr << sz; tok->str(ostr.str().c_str()); while (tok->next()->str() != ")") tok->deleteNext(); tok->deleteNext(); } } // Replace 'sizeof(var)' for (Token *tok = _tokens; tok; tok = tok->next()) { // type array [ num ] ; if (! Token::Match(tok, "%type% %var% [ %num% ] ;")) continue; int size = SizeOfType(tok->aaaa()); if (size <= 0) continue; const unsigned int varid = tok->next()->varId(); if (varid == 0) continue; int total_size = size * std::atoi(tok->strAt(3)); // Replace 'sizeof(var)' with number int indentlevel = 0; for (Token *tok2 = tok->tokAt(5); tok2; tok2 = tok2->next()) { if (tok2->str() == "{") { ++indentlevel; } else if (tok2->str() == "}") { --indentlevel; if (indentlevel < 0) break; } else if (Token::Match(tok2, "sizeof ( %varid% )", varid)) { std::ostringstream str; str << total_size; tok2->str(str.str().c_str()); // Delete the other tokens.. for (int i = 0; i < 3; i++) { tok2->deleteNext(); } } } } // Replace constants.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::Match(tok, "const %type% %var% = %num% ;")) { const char *sym = tok->strAt(2); const char *num = tok->strAt(4); int indent = 1; for (Token *tok2 = tok->tokAt(6); tok2; tok2 = tok2->next()) { if (tok2->str() == "{") { ++indent; } else if (tok2->str() == "}") { --indent; if (indent == 0) break; } // Compare constants, but don't touch members of other structures else if (tok2->str() == sym && tok2->previous() && tok2->previous()->str() != ".") { tok2->str(num); } } } } // Simple calculations.. for (bool done = false; !done;) { done = true; for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok->next(), "* 1") || Token::simpleMatch(tok->next(), "1 *")) { for (int i = 0; i < 2; i++) tok->deleteNext(); done = false; } // (1-2) if (Token::Match(tok, "[[,(=<>] %num% [+-*/] %num% [],);=<>]")) { int i1 = std::atoi(tok->strAt(1)); int i2 = std::atoi(tok->strAt(3)); if (i2 == 0 && *(tok->strAt(2)) == '/') { continue; } switch (*(tok->strAt(2))) { case '+': i1 += i2; break; case '-': i1 -= i2; break; case '*': i1 *= i2; break; case '/': i1 /= i2; break; } tok = tok->next(); std::ostringstream str; str << i1; tok->str(str.str().c_str()); for (int i = 0; i < 2; i++) { tok->deleteNext(); } done = false; } // Remove parantheses around number.. if (!tok->isName() && Token::Match(tok->next(), "( %num% )")) { tok->deleteNext(); tok = tok->next(); tok->deleteNext(); done = false; } // Remove parantheses around variable.. // keep parantheses here: dynamic_cast(p); if (!tok->isName() && tok->str() != ">" && Token::Match(tok->next(), "( %var% ) [;),+-*/><]]")) { tok->deleteNext(); tok = tok->next(); tok->deleteNext(); done = false; } } } // Replace "*(str + num)" => "str[num]" for (Token *tok = _tokens; tok; tok = tok->next()) { if (! strchr(";{}(=<>", tok->aaaa0())) continue; Token *next = tok->next(); if (! next) break; if (Token::Match(next, "* ( %var% + %num% )")) { const char *str[4] = {"var", "[", "num", "]"}; str[0] = tok->strAt(3); str[2] = tok->strAt(5); for (int i = 0; i < 4; i++) { tok = tok->next(); tok->str(str[i]); } tok->deleteNext(); tok->deleteNext(); } } // Split up variable declarations if possible.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (! Token::Match(tok, "[{};]")) continue; Token *type0 = tok->next(); if (!Token::Match(type0, "%type%")) continue; if (Token::Match(type0, "else|return")) continue; Token *tok2 = NULL; unsigned int typelen = 0; if (Token::Match(type0, "%type% %var% ,|=")) { if (type0->next()->str() != "operator") { tok2 = type0->tokAt(2); // The ',' or '=' token typelen = 1; } } else if (Token::Match(type0, "%type% * %var% ,|=")) { if (type0->next()->next()->str() != "operator") { tok2 = type0->tokAt(3); // The ',' token typelen = 1; } } else if (Token::Match(type0, "%type% %var% [ %num% ] ,|=")) { tok2 = type0->tokAt(5); // The ',' token typelen = 1; } else if (Token::Match(type0, "%type% * %var% [ %num% ] ,|=")) { tok2 = type0->tokAt(6); // The ',' token typelen = 1; } else if (Token::Match(type0, "struct %type% %var% ,|=")) { tok2 = type0->tokAt(3); typelen = 2; } else if (Token::Match(type0, "struct %type% * %var% ,|=")) { tok2 = type0->tokAt(4); typelen = 2; } if (tok2) { if (tok2->str() == ",") { tok2->str(";"); InsertTokens(tok2, type0, typelen); } else { Token *eq = tok2; int parlevel = 0; while (tok2) { if (strchr("{(", tok2->aaaa0())) { ++parlevel; } else if (strchr("})", tok2->aaaa0())) { if (parlevel < 0) break; --parlevel; } else if (parlevel == 0 && strchr(";,", tok2->aaaa0())) { // "type var =" => "type var; var =" Token *VarTok = type0->tokAt(typelen); if (VarTok->aaaa0() == '*') VarTok = VarTok->next(); InsertTokens(eq, VarTok, 2); eq->str(";"); // "= x, " => "= x; type " if (tok2->str() == ",") { tok2->str(";"); InsertTokens(tok2, type0, typelen); } break; } tok2 = tok2->next(); } } } } // In case variable declarations have been updated... setVarId(); // Replace NULL with 0.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (tok->str() == "NULL") tok->str("0"); } // Replace pointer casts of 0.. "(char *)0" => "0" for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::Match(tok->next(), "( %type% * ) 0") || Token::Match(tok->next(), "( %type% %type% * ) 0")) { while (!Token::simpleMatch(tok->next(), "0")) tok->deleteNext(); } } simplifyIfAddBraces(); simplifyFunctionParameters(); elseif(); for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::Match(tok, "case %any% : %var%")) tok->next()->next()->insertToken(";"); if (Token::Match(tok, "default : %var%")) tok->next()->insertToken(";"); } bool modified = true; while (modified) { modified = false; modified |= simplifyConditions(); modified |= simplifyCasts(); modified |= simplifyFunctionReturn(); modified |= simplifyKnownVariables(); modified |= removeReduntantConditions(); modified |= simplifyRedundantParanthesis(); } } //--------------------------------------------------------------------------- const Token *Tokenizer::findClosing(const Token *tok, const char *start, const char *end) { if (!tok) return 0; // Find the closing "}" int indentLevel = 0; for (const Token *closing = tok->next(); closing; closing = closing->next()) { if (closing->str() == start) { ++indentLevel; continue; } if (closing->str() == end) --indentLevel; if (indentLevel >= 0) continue; // Closing } is found. return closing; } return 0; } bool Tokenizer::removeReduntantConditions() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { if (!Token::simpleMatch(tok, "if")) continue; if (!Token::Match(tok->tokAt(1), "( %bool% ) {")) continue; // Find matching else const Token *elseTag = 0; // Find the closing "}" elseTag = Tokenizer::findClosing(tok->tokAt(4), "{", "}"); if (elseTag) elseTag = elseTag->next(); bool boolValue = false; if (tok->tokAt(2)->str() == "true") boolValue = true; // Handle if with else if (elseTag && elseTag->str() == "else") { if (Token::simpleMatch(elseTag->next(), "if")) { // Handle "else if" if (boolValue == false) { // Convert "if( false ) {aaa;} else if() {bbb;}" => "if() {bbb;}" Token::eraseTokens(tok, elseTag->tokAt(2)); ret = true; } else { // Keep first if, remove every else if and else after it const Token *lastTagInIf = elseTag->tokAt(2); while (lastTagInIf) { if (lastTagInIf->str() == "(") { lastTagInIf = Tokenizer::findClosing(lastTagInIf, "(", ")"); lastTagInIf = lastTagInIf->next(); } lastTagInIf = Tokenizer::findClosing(lastTagInIf, "{", "}"); lastTagInIf = lastTagInIf->next(); if (!Token::simpleMatch(lastTagInIf, "else")) break; lastTagInIf = lastTagInIf->next(); if (Token::simpleMatch(lastTagInIf, "if")) lastTagInIf = lastTagInIf->next(); } Token::eraseTokens(elseTag->previous(), lastTagInIf); ret = true; } } else { // Handle else if (boolValue == false) { // Convert "if( false ) {aaa;} else {bbb;}" => "{bbb;}" or ";{bbb;}" if (tok->previous()) tok = tok->previous(); else tok->str(";"); Token::eraseTokens(tok, elseTag->tokAt(1)); } else { if (Token::simpleMatch(elseTag->tokAt(1), "{")) { // Convert "if( true ) {aaa;} else {bbb;}" => "{aaa;}" const Token *end = Tokenizer::findClosing(elseTag->tokAt(1), "{", "}"); if (!end) { // Possibly syntax error in code return false; } // Remove the "else { aaa; }" Token::eraseTokens(elseTag->previous(), end->tokAt(1)); } // Remove "if( true )" if (tok->previous()) tok = tok->previous(); else tok->str(";"); Token::eraseTokens(tok, tok->tokAt(5)); } ret = true; } } // Handle if without else else { if (boolValue == false) { // Remove if and its content if (tok->previous()) tok = tok->previous(); else tok->str(";"); Token::eraseTokens(tok, elseTag); } else { // convert "if( true ) {aaa;}" => "{aaa;}" if (tok->previous()) tok = tok->previous(); else tok->str(";"); Token::eraseTokens(tok, tok->tokAt(5)); } ret = true; } } return ret; } bool Tokenizer::simplifyIfAddBraces() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok ? tok->next() : NULL) { if (Token::Match(tok, "if|for|while (")) { // Goto the ending ')' int parlevel = 1; tok = tok->next(); while (parlevel >= 1 && (tok = tok->next())) { if (tok->str() == "(") ++parlevel; else if (tok->str() == ")") --parlevel; } // ')' should be followed by '{' if (!tok || Token::simpleMatch(tok, ") {")) continue; } else if (tok->str() == "else") { // An else followed by an if or brace don't need to be processed further if (Token::Match(tok, "else if|{")) continue; } else { continue; } // insert open brace.. tok->insertToken("{"); tok = tok->next(); // insert close brace.. // In most cases it would work to just search for the next ';' and insert a closing brace after it. // But here are special cases.. // * if (cond) for (;;) break; // * if (cond1) if (cond2) { } int parlevel = 0; int indentlevel = 0; while ((tok = tok->next()) != NULL) { if (tok->str() == "{") ++indentlevel; else if (tok->str() == "}") { --indentlevel; if (indentlevel == 0) break; } else if (tok->str() == "(") ++parlevel; else if (tok->str() == ")") --parlevel; else if (indentlevel == 0 && parlevel == 0 && tok->str() == ";") break; } if (tok) { tok->insertToken("}"); ret = true; } } return ret; } bool Tokenizer::simplifyConditions() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok, "( true &&") || Token::simpleMatch(tok, "&& true &&") || Token::simpleMatch(tok->next(), "&& true )")) { tok->deleteNext(); tok->deleteNext(); ret = true; } else if (Token::simpleMatch(tok, "( false ||") || Token::simpleMatch(tok, "|| false ||") || Token::simpleMatch(tok->next(), "|| false )")) { tok->deleteNext(); tok->deleteNext(); ret = true; } // Change numeric constant in condition to "true" or "false" if (Token::Match(tok, "if|while ( %num%") && (tok->tokAt(3)->str() == ")" || tok->tokAt(3)->str() == "||" || tok->tokAt(3)->str() == "&&")) { tok->next()->next()->str((tok->tokAt(2)->str() != "0") ? "true" : "false"); ret = true; } Token *tok2 = tok->tokAt(2); if (tok2 && (tok->str() == "&&" || tok->str() == "||") && Token::Match(tok->next(), "%num%") && (tok2->str() == ")" || tok2->str() == "&&" || tok2->str() == "||")) { tok->next()->str((tok->next()->str() != "0") ? "true" : "false"); ret = true; } // Reduce "(%num% == %num%)" => "(true)"/"(false)" const Token *tok4 = tok->tokAt(4); if (! tok4) break; if ((tok->str() == "&&" || tok->str() == "||" || tok->str() == "(") && Token::Match(tok->tokAt(1), "%num% %any% %num%") && (tok4->str() == "&&" || tok4->str() == "||" || tok4->str() == ")")) { double op1 = (strstr(tok->strAt(1), "0x")) ? std::strtol(tok->strAt(1), 0, 16) : std::atof(tok->strAt(1)); double op2 = (strstr(tok->strAt(3), "0x")) ? std::strtol(tok->strAt(3), 0, 16) : std::atof(tok->strAt(3)); std::string cmp = tok->strAt(2); bool result = false; if (cmp == "==") result = (op1 == op2); else if (cmp == "!=") result = (op1 != op2); else if (cmp == ">=") result = (op1 >= op2); else if (cmp == ">") result = (op1 > op2); else if (cmp == "<=") result = (op1 <= op2); else if (cmp == "<") result = (op1 < op2); else cmp = ""; if (! cmp.empty()) { tok = tok->next(); tok->deleteNext(); tok->deleteNext(); tok->str(result ? "true" : "false"); ret = true; } } } return ret; } bool Tokenizer::simplifyCasts() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { if (!tok->isName() && Token::Match(tok->next(), "( %type% * )")) { tok->deleteNext(); tok->deleteNext(); tok->deleteNext(); tok->deleteNext(); ret = true; } else if (Token::Match(tok->next(), "dynamic_cast|reinterpret_cast|const_cast|static_cast <")) { while (tok->next() && tok->next()->str() != ">") tok->deleteNext(); tok->deleteNext(); tok->deleteNext(); Token *tok2 = tok; int parlevel = 0; while (tok2->next() && parlevel >= 0) { tok2 = tok2->next(); if (Token::simpleMatch(tok2->next(), "(")) ++parlevel; else if (Token::simpleMatch(tok2->next(), ")")) --parlevel; } if (tok2->next()) tok2->deleteNext(); ret = true; } } return ret; } bool Tokenizer::simplifyFunctionParameters() { bool ret = false; int indentlevel = 0; for (Token *tok = _tokens; tok; tok = tok->next()) { if (tok->str() == "{") ++indentlevel; else if (tok->str() == "}") --indentlevel; // Find the function e.g. foo( x ) or foo( x, y ) else if (indentlevel == 0 && Token::Match(tok, "%var% ( %var% [,)]")) { // We have found old style function, now we need to change it // Get list of argument names std::map argumentNames; bool bailOut = false; for (tok = tok->tokAt(2); tok; tok = tok->tokAt(2)) { if (!Token::Match(tok, "%var% [,)]")) { bailOut = true; break; } argumentNames[tok->str()] = tok; if (tok->next()->str() == ")") { tok = tok->tokAt(2); break; } } if (bailOut) { continue; } Token *start = tok; while (tok && tok->str() != "{") { if (tok->str() == ";") { tok = tok->previous(); // Move tokens from start to tok into the place of // argumentNames[tok->str()] and remove the ";" if (argumentNames.find(tok->str()) == argumentNames.end()) { bailOut = true; break; } // Remove the following ";" Token *temp = tok->tokAt(2); tok->deleteNext(); // Replace "x" with "int x" or similar Token::replace(argumentNames[tok->str()], start, tok); ret = true; tok = temp; start = tok; } else { tok = tok->next(); } } if (tok == NULL) { break; } if (bailOut) { continue; } ++indentlevel; } } return ret; } bool Tokenizer::simplifyFunctionReturn() { bool ret = false; int indentlevel = 0; for (const Token *tok = tokens(); tok; tok = tok->next()) { if (tok->str() == "{") ++indentlevel; else if (tok->str() == "}") --indentlevel; else if (indentlevel == 0 && Token::Match(tok, "%var% ( ) { return %num% ; }")) { std::ostringstream pattern; pattern << "[(=+-*/] " << tok->str() << " ( ) [;)+-*/]"; for (Token *tok2 = _tokens; tok2; tok2 = tok2->next()) { if (Token::Match(tok2, pattern.str().c_str())) { tok2 = tok2->next(); tok2->str(tok->strAt(5)); tok2->deleteNext(); tok2->deleteNext(); ret = true; } } } } return ret; } static void incdec(std::string &value, const std::string &op) { int ivalue = 0; std::istringstream istr(value.c_str()); istr >> ivalue; if (op == "++") ++ivalue; else if (op == "--") --ivalue; std::ostringstream ostr; ostr << ivalue; value = ostr.str(); } bool Tokenizer::simplifyKnownVariables() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { // Search for a block of code if (! Token::Match(tok, ") const| {")) continue; // parse the block of code.. int indentlevel = 0; for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == "{") ++indentlevel; else if (tok2->str() == "}") { --indentlevel; if (indentlevel <= 0) break; } else if (Token::Match(tok2, "%var% = %num% ;") || Token::Match(tok2, "%var% = %bool% ;")) { unsigned int varid = tok2->varId(); if (varid == 0) continue; std::string value(tok2->strAt(2)); for (Token *tok3 = tok2->next(); tok3; tok3 = tok3->next()) { // Perhaps it's a loop => bail out if (Token::Match(tok3, "[{}]")) break; // Variable is used somehow in a non-defined pattern => bail out if (tok3->varId() == varid) break; // Replace variable with numeric constant.. if (Token::Match(tok3, "if ( %varid% )", varid)) { tok3 = tok3->next()->next(); tok3->str(value.c_str()); ret = true; } // Variable is used in calculation.. if (Token::Match(tok3, "[=+-*/[] %varid% [+-*/;]]", varid)) { tok3 = tok3->next(); tok3->str(value.c_str()); ret = true; } if (Token::Match(tok3->next(), "%varid% ++|--", varid)) { tok3 = tok3->next(); const std::string op(tok3->strAt(1)); if (!Token::Match(tok3->previous(), "; %any% %any% ;")) { tok3->str(value.c_str()); tok3->deleteNext(); } incdec(value, op); } if (Token::Match(tok3->next(), "++|-- %varid%", varid)) { incdec(value, tok3->strAt(1)); if (!Token::Match(tok3, "; %any% %any% ;")) { tok3->deleteNext(); tok3->next()->str(value.c_str()); } tok3 = tok3->next(); } } } } } return ret; } bool Tokenizer::elseif() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { if (!Token::simpleMatch(tok, "else if")) continue; int indent = 0; for (Token *tok2 = tok; indent >= 0 && tok2; tok2 = tok2->next()) { if (Token::Match(tok2, "(|{")) ++indent; else if (Token::Match(tok2, ")|}")) --indent; if (indent == 0 && Token::Match(tok2, "}|;")) { if (!Token::simpleMatch(tok2->next(), "else")) { tok->insertToken("{"); tok2->insertToken("}"); ret = true; break; } } } } return ret; } bool Tokenizer::simplifyRedundantParanthesis() { bool ret = false; for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok, "( (")) { int parlevel = 0; for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == "(") ++parlevel; else if (tok2->str() == ")") { --parlevel; if (parlevel == 1) { if (Token::simpleMatch(tok2, ") )")) { tok->deleteNext(); tok2->deleteNext(); } break; } } } } } return ret; } //--------------------------------------------------------------------------- // Helper functions for handling the tokens list //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- const Token *Tokenizer::GetFunctionTokenByName(const char funcname[]) const { for (unsigned int i = 0; i < _functionList.size(); ++i) { if (_functionList[i]->str() == funcname) { return _functionList[i]; } } return NULL; } void Tokenizer::fillFunctionList() { _functionList.clear(); int indentlevel = 0; for (const Token *tok = _tokens; tok; tok = tok->next()) { if (tok->str() == "{") ++indentlevel; else if (tok->str() == "}") --indentlevel; if (indentlevel > 0) { continue; } if (Token::Match(tok, "%var% (")) { // Check if this is the first token of a function implementation.. for (const Token *tok2 = tok->tokAt(2); tok2; tok2 = tok2->next()) { if (tok2->str() == ";") { tok = tok2; break; } else if (tok2->str() == "{") { break; } else if (tok2->str() == ")") { if (Token::Match(tok2, ") const| {")) { _functionList.push_back(tok); tok = tok2; } else { tok = tok2; while (tok->next() && !strchr(";{", tok->next()->aaaa0())) tok = tok->next(); } break; } } } } // If the _functionList functions with duplicate names, remove them // TODO this will need some better handling for (unsigned int func1 = 0; func1 < _functionList.size();) { bool hasDuplicates = false; for (unsigned int func2 = func1 + 1; func2 < _functionList.size();) { if (_functionList[func1]->str() == _functionList[func2]->str()) { hasDuplicates = true; _functionList.erase(_functionList.begin() + func2); } else { ++func2; } } if (! hasDuplicates) { ++func1; } else { _functionList.erase(_functionList.begin() + func1); } } } //--------------------------------------------------------------------------- // Deallocate lists.. void Tokenizer::DeallocateTokens() { deleteTokens(_tokens); _tokens = 0; _tokensBack = 0; _files.clear(); } void Tokenizer::deleteTokens(Token *tok) { while (tok) { Token *next = tok->next(); delete tok; tok = next; } } //--------------------------------------------------------------------------- const char *Tokenizer::getParameterName(const Token *ftok, int par) { int _par = 1; for (; ftok; ftok = ftok->next()) { if (ftok->str() == ",") ++_par; if (par == _par && Token::Match(ftok, "%var% [,)]")) return ftok->aaaa(); } return NULL; } //--------------------------------------------------------------------------- std::string Tokenizer::fileLine(const Token *tok) const { std::ostringstream ostr; ostr << "[" << _files.at(tok->fileIndex()) << ":" << tok->linenr() << "]"; return ostr.str(); } std::string Tokenizer::file(const Token *tok) const { return _files.at(tok->fileIndex()); } //---------------------------------------------------------------------------