cppcheck/lib/tokenize.h

733 lines
20 KiB
C
Raw Normal View History

/*
* Cppcheck - A tool for static C/C++ code analysis
2023-01-28 10:16:34 +01:00
* Copyright (C) 2007-2023 Cppcheck team.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
#ifndef tokenizeH
#define tokenizeH
//---------------------------------------------------------------------------
2010-08-06 17:44:26 +02:00
2017-05-27 04:33:47 +02:00
#include "config.h"
#include "tokenlist.h"
#include <cassert>
#include <iosfwd>
2017-05-27 04:33:47 +02:00
#include <list>
#include <map>
#include <string>
#include <vector>
class Settings;
class SymbolDatabase;
class TimerResults;
2017-05-27 04:33:47 +02:00
class Token;
class TemplateSimplifier;
2020-05-23 07:16:49 +02:00
class ErrorLogger;
class Preprocessor;
class VariableMap;
enum class Severity;
namespace simplecpp {
class TokenList;
}
/// @addtogroup Core
/// @{
/** @brief The main purpose is to tokenize the source code. It also has functions that simplify the token list */
class CPPCHECKLIB Tokenizer {
friend class TestSimplifyTokens;
friend class TestSimplifyTypedef;
friend class TestSimplifyUsing;
friend class TestTokenizer;
friend class SymbolDatabase;
friend class TestSimplifyTemplate;
friend class TemplateSimplifier;
2018-05-29 18:41:05 +02:00
public:
explicit Tokenizer(const Settings * settings, ErrorLogger *errorLogger = nullptr, const Preprocessor *preprocessor = nullptr);
~Tokenizer();
2014-11-20 14:20:09 +01:00
void setTimerResults(TimerResults *tr) {
2018-06-16 22:14:59 +02:00
mTimerResults = tr;
}
/** Is the code C. Used for bailouts */
2014-11-20 14:20:09 +01:00
bool isC() const {
return list.isC();
}
/** Is the code CPP. Used for bailouts */
2014-11-20 14:20:09 +01:00
bool isCPP() const {
return list.isCPP();
}
/**
* Check if inner scope ends with a call to a noreturn function
* \param endScopeToken The '}' token
* \param unknown set to true if it's unknown if the scope is noreturn
* \return true if scope ends with a function call that might be 'noreturn'
*/
bool isScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const;
bool createTokens(std::istream &code, const std::string& FileName);
void createTokens(simplecpp::TokenList&& tokenList);
2016-07-25 12:12:11 +02:00
bool simplifyTokens1(const std::string &configuration);
/**
* Tokenize code
* @param code input stream for code, e.g.
* \code
* #file "p.h"
* class Foo
* {
* private:
* void Bar();
* };
*
* #endfile
* void Foo::Bar()
* {
* }
* \endcode
*
* @param FileName The filename
* @param configuration E.g. "A" for code where "#ifdef A" is true
2010-12-30 22:13:31 +01:00
* @return false if source code contains syntax errors
*/
bool tokenize(std::istream &code,
const char FileName[],
2016-07-25 12:12:11 +02:00
const std::string &configuration = emptyString);
/** Set variable id */
void setVarId();
void setVarIdPass1();
void setVarIdPass2();
/**
2021-08-07 20:51:18 +02:00
* Basic simplification of tokenlist
*
* @param FileName The filename to run; used to do
* markup checks.
*
* @return false if there is an error that requires aborting
* the checking of this file.
*/
bool simplifyTokenList1(const char FileName[]);
2019-03-02 19:52:15 +01:00
/**
* If --check-headers=no has been given; then remove unneeded code in headers.
* - All executable code.
* - Unused types/variables/etc
*/
void simplifyHeadersAndUnusedTemplates();
2019-03-02 19:52:15 +01:00
/**
* Remove extra "template" keywords that are not used by Cppcheck
*/
void removeExtraTemplateKeywords();
/** Split up template right angle brackets.
* foo < bar < >> => foo < bar < > >
*/
void splitTemplateRightAngleBrackets(bool check);
/**
* Calculates sizeof value for given type.
* @param type Token which will contain e.g. "int", "*", or string.
* @return sizeof for given type, or 0 if it can't be calculated.
*/
nonneg int sizeOfType(const Token* type) const;
nonneg int sizeOfType(const std::string& type) const;
void simplifyDebug();
/** Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}" */
void simplifyAssignmentBlock();
/** Insert array size where it isn't given */
void arraySize();
/** Simplify labels and 'case|default' syntaxes.
2021-08-07 20:51:18 +02:00
*/
void simplifyLabelsCaseDefault();
/** simplify case ranges (gcc extension)
2021-08-07 20:51:18 +02:00
*/
void simplifyCaseRange();
/** Remove macros in global scope */
void removeMacrosInGlobalScope();
void addSemicolonAfterUnknownMacro();
// Remove C99 and CPP11 _Pragma(str)
void removePragma();
/** Remove undefined macro in class definition:
2021-08-07 20:51:18 +02:00
* class DLLEXPORT Fred { };
* class Fred FINAL : Base { };
*/
void removeMacroInClassDef();
/** Add parentheses for sizeof: sizeof x => sizeof(x) */
void sizeofAddParentheses();
/**
* Simplify variable declarations (split up)
* \param only_k_r_fpar Only simplify K&R function parameters
*/
void simplifyVarDecl(const bool only_k_r_fpar);
void simplifyVarDecl(Token * tokBegin, const Token * const tokEnd, const bool only_k_r_fpar); // cppcheck-suppress functionConst // has side effects
/**
* Simplify variable initialization
* '; int *p(0);' => '; int *p = 0;'
*/
void simplifyInitVar();
static Token* initVar(Token* tok);
/**
* Simplify the location of "static" and "const" qualifiers in
* a variable declaration or definition.
* Example: "int static const a;" => "static const a;"
* Example: "long long const static b;" => "static const long long b;"
*/
void simplifyStaticConst();
/**
2010-12-15 18:45:53 +01:00
* Simplify multiple assignments.
* Example: "a = b = c = 0;" => "a = 0; b = 0; c = 0;"
*/
void simplifyVariableMultipleAssign();
/**
* Simplify the 'C Alternative Tokens'
* Examples:
* "if(s and t)" => "if(s && t)"
* "while((r bitand s) and not t)" => while((r & s) && !t)"
* "a and_eq b;" => "a &= b;"
*/
bool simplifyCAlternativeTokens();
/** Add braces to an if-block, for-block, etc.
* @return true if no syntax errors
*/
bool simplifyAddBraces();
/** Add braces to an if-block, for-block, etc.
* for command starting at token including else-block
* @return last token of command
* or input token in case of an error where no braces are added
* or NULL when syntaxError is called
*/
Token * simplifyAddBracesToCommand(Token * tok);
/** Add pair of braces to an single if-block, else-block, for-block, etc.
* for command starting at token
* @return last token of command
* or input token in case of an error where no braces are added
* or NULL when syntaxError is called
*/
Token * simplifyAddBracesPair(Token *tok, bool commandWithCondition);
// Convert "using ...;" to corresponding typedef
void simplifyUsingToTypedef();
/**
* typedef A mytype;
* mytype c;
*
* Becomes:
* typedef A mytype;
* A c;
*/
void simplifyTypedef();
void simplifyTypedefCpp();
/**
* Move typedef token to the left og the expression
*/
void simplifyTypedefLHS();
/**
*/
bool isMemberFunction(const Token *openParen) const;
/**
*/
bool simplifyUsing();
void simplifyUsingError(const Token* usingStart, const Token* usingEnd);
/** Simplify useless C++ empty namespaces, like: 'namespace %name% { }'*/
void simplifyEmptyNamespaces();
/** Simplify "if else" */
void elseif();
/** Simplify C++17/C++20 if/switch/for initialization expression */
void simplifyIfSwitchForInit();
2010-10-27 10:34:06 +02:00
/**
* Reduces "; ;" to ";", except in "( ; ; )"
*/
void removeRedundantSemicolons();
/** Struct simplification
* "struct S { } s;" => "struct S { }; S s;"
*/
void simplifyStructDecl();
/**
* Remove redundant parentheses:
* - "((x))" => "(x)"
* - "(function())" => "function()"
* - "(delete x)" => "delete x"
* - "(delete [] x)" => "delete [] x"
* @return true if modifications to token-list are done.
* false if no modifications are done.
*/
bool simplifyRedundantParentheses();
/**
* Simplify functions like "void f(x) int x; {"
* into "void f(int x) {"
*/
void simplifyFunctionParameters();
/** Simplify function level try blocks:
* Convert "void f() try {} catch (int) {}"
* to "void f() { try {} catch (int) {} }"
*/
void simplifyFunctionTryCatch();
/**
* Simplify templates
*/
void simplifyTemplates();
void simplifyDoublePlusAndDoubleMinus();
void simplifyRedundantConsecutiveBraces();
void simplifyArrayAccessSyntax();
void simplifyParameterVoid();
2013-03-02 18:19:53 +01:00
void fillTypeSizes();
void combineOperators();
void combineStringAndCharLiterals();
2013-03-02 18:19:53 +01:00
void concatenateNegativeNumberAndAnyPositive();
void simplifyExternC();
void simplifyRoundCurlyParentheses();
void simplifyTypeIntrinsics();
void simplifySQL();
void checkForEnumsWithTypedef();
void findComplicatedSyntaxErrorsInTemplates();
/**
* Modify strings in the token list by replacing hex and oct
* values. E.g. "\x61" -> "a" and "\000" -> "\0"
* @param source The string to be modified, e.g. "\x61"
* @return Modified string, e.g. "a"
*/
static std::string simplifyString(const std::string &source);
/**
* is token pointing at function head?
* @param tok A '(' or ')' token in a possible function head
* @param endsWith string after function head
* @return token matching with endsWith if syntax seems to be a function head else nullptr
*/
const Token * isFunctionHead(const Token *tok, const std::string &endsWith) const;
/**
* is token pointing at function head?
* @param tok A '(' or ')' token in a possible function head
* @param endsWith string after function head
* @param cpp c++ code
* @return token matching with endsWith if syntax seems to be a function head else nullptr
*/
static const Token * isFunctionHead(const Token *tok, const std::string &endsWith, bool cpp);
const Preprocessor *getPreprocessor() const {
assert(mPreprocessor);
return mPreprocessor;
}
bool hasIfdef(const Token *start, const Token *end) const;
private:
/** Simplify pointer to standard type (C only) */
void simplifyPointerToStandardType();
2010-01-20 21:19:06 +01:00
/** Simplify function pointers */
void simplifyFunctionPointers();
/**
* Send error message to error logger about internal bug.
2010-01-17 16:38:28 +01:00
* @param tok the token that this bug concerns.
*/
NORETURN void cppcheckError(const Token *tok) const;
/**
* Setup links for tokens so that one can call Token::link().
*/
void createLinks();
/**
* Setup links between < and >.
*/
void createLinks2();
public:
/** Syntax error */
NORETURN void syntaxError(const Token *tok, const std::string &code = emptyString) const;
/** Syntax error. Unmatched character. */
2020-06-29 22:54:51 +02:00
NORETURN void unmatchedToken(const Token *tok) const;
/** Syntax error. C++ code in C file. */
2020-06-29 22:54:51 +02:00
NORETURN void syntaxErrorC(const Token *tok, const std::string &what) const;
2018-11-13 16:49:02 +01:00
/** Warn about unknown macro(s), configuration is recommended */
2020-06-29 22:54:51 +02:00
NORETURN void unknownMacroError(const Token *tok1) const;
2018-11-13 16:49:02 +01:00
void unhandledCharLiteral(const Token *tok, const std::string& msg) const;
private:
/** Report that there is an unhandled "class x y {" code */
void unhandled_macro_class_x_y(const Token *tok) const;
/** Check configuration (unknown macros etc) */
void checkConfiguration() const;
void macroWithSemicolonError(const Token *tok, const std::string &macroName) const;
/**
* Is there C++ code in C file?
*/
void validateC() const;
/**
* assert that tokens are ok - used during debugging for example
* to catch problems in simplifyTokenList1/2.
*/
void validate() const;
/** Detect unknown macros and throw unknownMacro */
void reportUnknownMacros() const;
/** Detect garbage code and call syntaxError() if found. */
void findGarbageCode() const;
/** Detect garbage expression */
static bool isGarbageExpr(const Token *start, const Token *end, bool allowSemicolon);
/**
* Remove __declspec()
*/
void simplifyDeclspec();
/**
* Remove calling convention
*/
void simplifyCallingConvention();
/**
2018-12-30 19:31:50 +01:00
* Remove \__attribute\__ ((?))
*/
void simplifyAttribute();
/** Get function token for a attribute */
Token* getAttributeFuncTok(Token* tok, bool gccattr) const;
/**
* Remove \__cppcheck\__ ((?))
*/
void simplifyCppcheckAttribute();
2021-04-22 19:15:22 +02:00
/** Simplify c++20 spaceship operator */
void simplifySpaceshipOperator();
/**
* Remove keywords "volatile", "inline", "register", and "restrict"
*/
void simplifyKeyword();
/**
* Remove __asm
*/
void simplifyAsm();
2015-06-17 09:09:23 +02:00
/**
* asm heuristics, Put ^{} statements in asm()
*/
void simplifyAsm2();
/**
2018-12-30 19:31:50 +01:00
* Simplify \@&hellip; (compiler extension)
*/
void simplifyAt();
/**
* Simplify bitfields - the field width is removed as we don't use it.
*/
void simplifyBitfields();
/**
* Remove unnecessary member qualification
*/
void removeUnnecessaryQualification();
/**
* Add std:: in front of std classes, when using namespace std; was given
*/
void simplifyNamespaceStd();
/**
2021-08-07 20:51:18 +02:00
* Convert Microsoft memory functions
* CopyMemory(dst, src, len) -> memcpy(dst, src, len)
* FillMemory(dst, len, val) -> memset(dst, val, len)
* MoveMemory(dst, src, len) -> memmove(dst, src, len)
* ZeroMemory(dst, len) -> memset(dst, 0, len)
*/
void simplifyMicrosoftMemoryFunctions();
/**
2021-08-07 20:51:18 +02:00
* Convert Microsoft string functions
* _tcscpy -> strcpy
*/
void simplifyMicrosoftStringFunctions();
/**
2021-08-07 20:51:18 +02:00
* Remove Borland code
*/
void simplifyBorland();
/**
* Collapse operator name tokens into single token
* operator = => operator=
*/
void simplifyOperatorName();
2020-09-06 21:02:06 +02:00
/** simplify overloaded operators: 'obj(123)' => 'obj . operator() ( 123 )' */
void simplifyOverloadedOperators();
2015-05-10 12:35:47 +02:00
/**
2021-08-07 20:51:18 +02:00
* Remove [[attribute]] (C++11 and later) from TokenList
*/
void simplifyCPPAttribute();
2015-05-10 12:35:47 +02:00
/**
* Convert namespace aliases
*/
void simplifyNamespaceAliases();
/**
* Convert C++17 style nested namespace to older style
*/
void simplifyNestedNamespace();
2021-04-18 19:42:22 +02:00
/**
* Simplify coroutines - just put parentheses around arguments for
* co_* keywords so they can be handled like function calls in data
* flow.
*/
void simplifyCoroutines();
/**
2021-08-07 20:51:18 +02:00
* Prepare ternary operators with parentheses so that the AST can be created
* */
void prepareTernaryOpForAST();
/**
* report error message
*/
void reportError(const Token* tok, const Severity severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
void reportError(const std::list<const Token*>& callstack, Severity severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
bool duplicateTypedef(Token **tokPtr, const Token *name, const Token *typeDef) const;
2010-12-04 15:49:25 +01:00
void unsupportedTypedef(const Token *tok) const;
void setVarIdClassDeclaration(Token* const startToken, // cppcheck-suppress functionConst // has side effects
VariableMap& variableMap,
const nonneg int scopeStartVarId,
std::map<nonneg int, std::map<std::string, nonneg int>>& structMembers);
void setVarIdStructMembers(Token **tok1,
std::map<nonneg int, std::map<std::string, nonneg int>>& structMembers,
nonneg int &varId) const;
void setVarIdClassFunction(const std::string &classname, // cppcheck-suppress functionConst // has side effects
Token * const startToken,
const Token * const endToken,
const std::map<std::string, nonneg int> &varlist,
std::map<nonneg int, std::map<std::string, nonneg int>>& structMembers,
nonneg int &varId_);
/**
* Output list of unknown types.
*/
void printUnknownTypes() const;
/** Find end of SQL (or PL/SQL) block */
static const Token *findSQLBlockEnd(const Token *tokSQLStart);
bool operatorEnd(const Token * tok) const;
public:
2014-11-20 14:20:09 +01:00
const SymbolDatabase *getSymbolDatabase() const {
return mSymbolDatabase;
}
void createSymbolDatabase();
/** print --debug output if debug flags match the simplification:
* 0=unknown/both simplifications
* 1=1st simplifications
* 2=2nd simplifications
*/
void printDebugOutput(int simplification) const;
2014-07-14 15:51:45 +02:00
void dump(std::ostream &out) const;
Token *deleteInvalidTypedef(Token *typeDef);
/**
* Get variable count.
* @return number of variables
*/
nonneg int varIdCount() const {
2018-06-16 16:38:50 +02:00
return mVarId;
}
/**
* Token list: stores all tokens.
*/
TokenList list;
// Implement tokens() as a wrapper for convenience when using the TokenList
2014-11-20 14:20:09 +01:00
const Token* tokens() const {
return list.front();
}
Token* tokens() {
return list.front();
}
/**
2021-08-07 20:51:18 +02:00
* Helper function to check whether number is one (1 or 0.1E+1 or 1E+0) or not?
* @param s the string to check
* @return true in case is is one and false otherwise.
*/
static bool isOneNumber(const std::string &s);
/**
2021-08-07 20:51:18 +02:00
* Helper function to check for start of function execution scope.
* Do not use this in checks. Use the symbol database.
* @param tok pointer to end parentheses of parameter list
* @return pointer to start brace of function scope or nullptr if not start.
*/
static const Token * startOfExecutableScope(const Token * tok);
const Settings *getSettings() const {
return mSettings;
}
void calculateScopes();
2018-05-28 12:44:18 +02:00
/** Disable copy constructor */
Tokenizer(const Tokenizer &) = delete;
2018-05-28 12:44:18 +02:00
/** Disable assignment operator */
Tokenizer &operator=(const Tokenizer &) = delete;
private:
const Token *processFunc(const Token *tok2, bool inOperator) const;
Token *processFunc(Token *tok2, bool inOperator);
/**
2021-08-07 20:51:18 +02:00
* Get new variable id.
* @return new variable id
*/
nonneg int newVarId() {
2018-06-16 16:38:50 +02:00
return ++mVarId;
}
/** Set pod types */
void setPodTypes();
2011-01-01 11:26:48 +01:00
/** settings */
const Settings * const mSettings;
2011-01-01 11:26:48 +01:00
/** errorlogger */
ErrorLogger* const mErrorLogger;
/** Symbol database that all checks etc can use */
SymbolDatabase* mSymbolDatabase{};
TemplateSimplifier * const mTemplateSimplifier;
/** E.g. "A" for code where "#ifdef A" is true. This is used to
print additional information in error situations. */
std::string mConfiguration;
/** sizeof information for known types */
std::map<std::string, int> mTypeSize;
2021-07-07 10:58:13 +02:00
struct TypedefInfo {
std::string name;
std::string filename;
int lineNumber;
2021-07-07 15:16:53 +02:00
int column;
2021-07-07 10:58:13 +02:00
bool used;
};
std::vector<TypedefInfo> mTypedefInfo;
/** variable count */
nonneg int mVarId{};
2018-06-07 08:33:32 +02:00
/** unnamed count "Unnamed0", "Unnamed1", "Unnamed2", ... */
nonneg int mUnnamedCount{};
/**
* TimerResults
*/
TimerResults* mTimerResults{};
2016-02-11 16:10:52 +01:00
const Preprocessor * const mPreprocessor;
};
/// @}
//---------------------------------------------------------------------------
#endif // tokenizeH