/* * Cppcheck - A tool for static C/C++ code analysis * Copyright (C) 2007-2019 Cppcheck team. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "clangastdump.h" #include "symboldatabase.h" #include "tokenize.h" #include #include #include static const std::string BinaryOperator = "BinaryOperator"; static const std::string CallExpr = "CallExpr"; static const std::string CompoundStmt = "CompoundStmt"; static const std::string DeclRefExpr = "DeclRefExpr"; static const std::string FunctionDecl = "FunctionDecl"; static const std::string IfStmt = "IfStmt"; static const std::string ImplicitCastExpr = "ImplicitCastExpr"; static const std::string IntegerLiteral = "IntegerLiteral"; static const std::string ParmVarDecl = "ParmVarDecl"; static const std::string ReturnStmt = "ReturnStmt"; static const std::string UnaryOperator = "UnaryOperator"; static const std::string VarDecl = "VarDecl"; static std::string unquote(const std::string &s) { return (s[0] == '\'') ? s.substr(1, s.size() - 2) : s; } static std::vector splitString(const std::string &line) { std::vector ret; std::string::size_type pos1 = line.find_first_not_of(" "); while (pos1 != std::string::npos) { std::string::size_type pos2; if (line[pos1] == '<') pos2 = line.find(">", pos1); else if (line[pos1] == '\'') pos2 = line.find("\'", pos1+1); else pos2 = line.find(" ", pos1) - 1; ret.push_back(line.substr(pos1, pos2+1-pos1)); if (pos2 == std::string::npos) break; pos1 = line.find_first_not_of(" ", pos2 + 1); } return ret; } namespace clangastdump { struct Data { struct Decl { Decl(Token *def, Variable *var) : def(def), var(var) {} Token *def; Variable *var; }; SymbolDatabase *mSymbolDatabase = nullptr; void varDecl(const std::string &addr, Token *def, Variable *var) { Decl decl(def, var); mDeclMap.insert(std::pair(addr, decl)); def->varId(++mVarId); def->variable(var); var->setValueType(ValueType(ValueType::Sign::SIGNED, ValueType::Type::INT, 0)); } void ref(const std::string &addr, Token *tok) { auto it = mDeclMap.find(addr); if (it != mDeclMap.end()) { tok->varId(it->second.var->declarationId()); tok->variable(it->second.var); } } std::vector getVariableList() const { std::vector ret; ret.resize(mVarId + 1, nullptr); for (auto it: mDeclMap) { if (it.second.var) ret[it.second.var->declarationId()] = it.second.var; } return ret; } private: std::map mDeclMap; int mVarId = 0; }; class AstNode { public: AstNode(const std::string &nodeType, const std::string &ext, Data *data) : nodeType(nodeType), mExtTokens(splitString(ext)), mData(data) {} std::string nodeType; std::vector> children; void setLocations(TokenList *tokenList, int file, int line, int col); void dumpAst(int num = 0, int indent = 0) const; Token *createTokens(TokenList *tokenList); private: Token *addtoken(TokenList *tokenList, const std::string &str); Token *addTypeTokens(TokenList *tokenList, const std::string &str); Scope *createScope(TokenList *tokenList, Scope::ScopeType scopeType, AstNode *astNode); std::string getSpelling() const; std::string getType() const; int mFile = 0; int mLine = 1; int mCol = 1; int mVarId = 0; std::vector mExtTokens; Data *mData; }; typedef std::shared_ptr AstNodePtr; } std::string clangastdump::AstNode::getSpelling() const { if (nodeType == ParmVarDecl) return mExtTokens[mExtTokens.size() - 2]; return ""; } std::string clangastdump::AstNode::getType() const { if (nodeType == DeclRefExpr) return unquote(mExtTokens.back()); if (nodeType == BinaryOperator) return unquote(mExtTokens[mExtTokens.size() - 2]); if (nodeType == IntegerLiteral) return unquote(mExtTokens[mExtTokens.size() - 2]); return ""; } void clangastdump::AstNode::dumpAst(int num, int indent) const { (void)num; std::cout << std::string(indent, ' ') << nodeType; for (auto tok: mExtTokens) std::cout << " " << tok; std::cout << std::endl; for (int c = 0; c < children.size(); ++c) children[c]->dumpAst(c, indent + 2); } void clangastdump::AstNode::setLocations(TokenList *tokenList, int file, int line, int col) { for (const std::string &ext: mExtTokens) { if (ext.compare(0,5,"appendFileIfNew(ext.substr(1,ext.find(":") - 1)); } mFile = file; mLine = line; mCol = col; for (auto child: children) { if (child) child->setLocations(tokenList, file, line, col); } } Token *clangastdump::AstNode::addtoken(TokenList *tokenList, const std::string &str) { const Scope *scope; if (!tokenList->back()) scope = &mData->mSymbolDatabase->scopeList.front(); else if (tokenList->back()->str() == "}") scope = tokenList->back()->scope()->nestedIn; else scope = tokenList->back()->scope(); tokenList->addtoken(str, mLine, mFile); tokenList->back()->column(mCol); tokenList->back()->scope(scope); if (getType() == "int") tokenList->back()->setValueType(new ValueType(ValueType::Sign::SIGNED, ValueType::Type::INT, 0)); return tokenList->back(); } Token *clangastdump::AstNode::addTypeTokens(TokenList *tokenList, const std::string &str) { if (str.find(" (") == std::string::npos) return addtoken(tokenList, unquote(str)); return addtoken(tokenList, str.substr(1,str.find(" (")-1)); } Scope *clangastdump::AstNode::createScope(TokenList *tokenList, Scope::ScopeType scopeType, AstNode *astNode) { SymbolDatabase *symbolDatabase = mData->mSymbolDatabase; const Scope *nestedIn; if (!tokenList->back()) nestedIn = &symbolDatabase->scopeList.front(); else if (tokenList->back()->str() == "}") nestedIn = tokenList->back()->link()->previous()->scope(); else nestedIn = tokenList->back()->scope(); symbolDatabase->scopeList.push_back(Scope(nullptr, nullptr, nestedIn)); Scope *scope = &symbolDatabase->scopeList.back(); scope->type = scopeType; Token *bodyStart = addtoken(tokenList, "{"); tokenList->back()->scope(scope); astNode->createTokens(tokenList); if (tokenList->back()->str() != ";") addtoken(tokenList, ";"); Token *bodyEnd = addtoken(tokenList, "}"); bodyStart->link(bodyEnd); scope->bodyStart = bodyStart; scope->bodyEnd = bodyEnd; return scope; } Token *clangastdump::AstNode::createTokens(TokenList *tokenList) { if (nodeType == BinaryOperator) { Token *tok1 = children[0]->createTokens(tokenList); Token *binop = addtoken(tokenList, unquote(mExtTokens.back())); Token *tok2 = children[1]->createTokens(tokenList); binop->astOperand1(tok1); binop->astOperand2(tok2); return binop; } if (nodeType == CallExpr) { Token *f = children[0]->createTokens(tokenList); Token *par1 = addtoken(tokenList, "("); par1->astOperand1(f); Token *parent = par1; for (int c = 1; c < children.size(); ++c) { if (c + 1 < children.size()) { Token *child = children[c]->createTokens(tokenList); Token *comma = addtoken(tokenList, ","); comma->astOperand1(child); parent->astOperand2(comma); parent = comma; } else { parent->astOperand2(children[c]->createTokens(tokenList)); } } par1->link(addtoken(tokenList, ")")); return par1; } if (nodeType == CompoundStmt) { for (AstNodePtr child: children) { child->createTokens(tokenList); child->addtoken(tokenList, ";"); } return nullptr; } if (nodeType == DeclRefExpr) { const std::string addr = mExtTokens[mExtTokens.size() - 3]; Token *reftok = addtoken(tokenList, unquote(mExtTokens[mExtTokens.size() - 2])); mData->ref(addr, reftok); return reftok; } if (nodeType == FunctionDecl) { SymbolDatabase *symbolDatabase = mData->mSymbolDatabase; addTypeTokens(tokenList, mExtTokens.back()); Token *nameToken = addtoken(tokenList, mExtTokens[mExtTokens.size() - 2]); Scope &globalScope = symbolDatabase->scopeList.front(); symbolDatabase->scopeList.push_back(Scope(nullptr, nullptr, &globalScope)); Scope &scope = symbolDatabase->scopeList.back(); symbolDatabase->functionScopes.push_back(&scope); globalScope.functionList.push_back(Function(nameToken)); scope.function = &globalScope.functionList.back(); scope.type = Scope::ScopeType::eFunction; scope.className = nameToken->str(); Token *par1 = addtoken(tokenList, "("); // Function arguments for (AstNodePtr child: children) { if (child->nodeType != ParmVarDecl) continue; if (tokenList->back() != par1) addtoken(tokenList, ","); addTypeTokens(tokenList, child->mExtTokens.back()); const std::string spelling = child->getSpelling(); if (!spelling.empty()) { const std::string addr = child->mExtTokens[0]; Token *vartok = addtoken(tokenList, spelling); scope.function->argumentList.push_back(Variable(vartok, nullptr, nullptr, 0, AccessControl::Argument, nullptr, &scope, nullptr)); mData->varDecl(addr, vartok, &scope.function->argumentList.back()); } } Token *par2 = addtoken(tokenList, ")"); par1->link(par2); // Function body if (!children.empty() && children.back()->nodeType == CompoundStmt) { Token *bodyStart = addtoken(tokenList, "{"); bodyStart->scope(&scope); children.back()->createTokens(tokenList); Token *bodyEnd = addtoken(tokenList, "}"); scope.bodyStart = bodyStart; scope.bodyEnd = bodyEnd; bodyStart->link(bodyEnd); } else { addtoken(tokenList, ";"); } return nullptr; } if (nodeType == IfStmt) { AstNode *cond = children[2].get(); AstNode *then = children[3].get(); Token *iftok = addtoken(tokenList, "if"); Token *par1 = addtoken(tokenList, "("); par1->astOperand1(iftok); par1->astOperand2(cond->createTokens(tokenList)); Token *par2 = addtoken(tokenList, ")"); par1->link(par2); createScope(tokenList, Scope::ScopeType::eIf, then); return nullptr; } if (nodeType == ImplicitCastExpr) return children[0]->createTokens(tokenList); if (nodeType == IntegerLiteral) return addtoken(tokenList, mExtTokens.back()); if (nodeType == ReturnStmt) { Token *tok1 = addtoken(tokenList, "return"); if (!children.empty()) tok1->astOperand1(children[0]->createTokens(tokenList)); return tok1; } if (nodeType == UnaryOperator) { Token *unop = addtoken(tokenList, unquote(mExtTokens.back())); unop->astOperand1(children[0]->createTokens(tokenList)); return unop; } if (nodeType == VarDecl) { bool isInit = mExtTokens.back() == "cinit"; const std::string addr = mExtTokens.front(); const std::string type = isInit ? mExtTokens[mExtTokens.size() - 2] : mExtTokens.back(); const std::string name = isInit ? mExtTokens[mExtTokens.size() - 3] : mExtTokens[mExtTokens.size() - 2]; addTypeTokens(tokenList, type); Token *vartok1 = addtoken(tokenList, name); Scope *scope = const_cast(tokenList->back()->scope()); const AccessControl accessControl = (scope->type == Scope::ScopeType::eGlobal) ? (AccessControl::Global) : (AccessControl::Local); scope->varlist.push_back(Variable(vartok1, nullptr, nullptr, 0, accessControl, nullptr, scope, nullptr)); mData->varDecl(addr, vartok1, &scope->varlist.back()); addtoken(tokenList, ";"); if (isInit) { Token *vartok2 = addtoken(tokenList, name); mData->ref(addr, vartok2); Token *eq = addtoken(tokenList, "="); eq->astOperand1(vartok2); eq->astOperand2(children.back()->createTokens(tokenList)); } return nullptr; } return addtoken(tokenList, "?" + nodeType + "?"); } void clangastdump::parseClangAstDump(Tokenizer *tokenizer, std::istream &f) { TokenList *tokenList = &tokenizer->list; tokenizer->createSymbolDatabase(); SymbolDatabase *symbolDatabase = const_cast(tokenizer->getSymbolDatabase()); symbolDatabase->scopeList.push_back(Scope(nullptr, nullptr, nullptr)); symbolDatabase->scopeList.back().type = Scope::ScopeType::eGlobal; clangastdump::Data data; data.mSymbolDatabase = symbolDatabase; std::string line; std::vector tree; while (std::getline(f,line)) { const std::string::size_type pos1 = line.find("-"); if (pos1 == std::string::npos) continue; if (!tree.empty() && line.substr(pos1) == "-<<>>") { const int level = (pos1 - 1) / 2; tree[level - 1]->children.push_back(nullptr); continue; } const std::string::size_type pos2 = line.find(" ", pos1); if (pos2 < pos1 + 4 || pos2 == std::string::npos) continue; const std::string nodeType = line.substr(pos1+1, pos2 - pos1 - 1); const std::string ext = line.substr(pos2); if (pos1 == 1 && (nodeType == FunctionDecl || nodeType == VarDecl)) { if (!tree.empty()) { tree[0]->setLocations(tokenList, 0, 1, 1); tree[0]->createTokens(tokenList); } tree.clear(); tree.push_back(std::make_shared(nodeType, ext, &data)); continue; } const int level = (pos1 - 1) / 2; if (level == 0 || tree.empty()) continue; AstNodePtr newNode = std::make_shared(nodeType, ext, &data); tree[level - 1]->children.push_back(newNode); if (level >= tree.size()) tree.push_back(newNode); else tree[level] = newNode; } if (!tree.empty()) { tree[0]->setLocations(tokenList, 0, 1, 1); tree[0]->createTokens(tokenList); } symbolDatabase->clangSetVariables(data.getVariableList()); tokenList->clangSetOrigFiles(); }