/* * simplecpp - A simple and high-fidelity C/C++ preprocessor library * Copyright (C) 2016 Daniel Marjamäki. * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include "simplecpp.h" #include #include #include #include #include #include #include #include #include #include // strtoll, etc #include #include #include #include namespace { const simplecpp::TokenString DEFINE("define"); const simplecpp::TokenString UNDEF("undef"); const simplecpp::TokenString INCLUDE("include"); const simplecpp::TokenString ERROR("error"); const simplecpp::TokenString WARNING("warning"); const simplecpp::TokenString IF("if"); const simplecpp::TokenString IFDEF("ifdef"); const simplecpp::TokenString IFNDEF("ifndef"); const simplecpp::TokenString DEFINED("defined"); const simplecpp::TokenString ELSE("else"); const simplecpp::TokenString ELIF("elif"); const simplecpp::TokenString ENDIF("endif"); const simplecpp::TokenString PRAGMA("pragma"); const simplecpp::TokenString ONCE("once"); template std::string toString(T t) { std::ostringstream ostr; ostr << t; return ostr.str(); } long long stringToLL(const std::string &s) { long long ret; bool hex = (s.compare(0, 2, "0x") == 0); std::istringstream istr(hex ? s.substr(2) : s); if (hex) istr >> std::hex; istr >> ret; return ret; } unsigned long long stringToULL(const std::string &s) { unsigned long long ret; bool hex = (s.compare(0, 2, "0x") == 0); std::istringstream istr(hex ? s.substr(2) : s); if (hex) istr >> std::hex; istr >> ret; return ret; } bool sameline(const simplecpp::Token *tok1, const simplecpp::Token *tok2) { return tok1 && tok2 && tok1->location.sameline(tok2->location); } } void simplecpp::Location::adjust(const std::string &str) { if (str.find_first_of("\r\n") == std::string::npos) { col += str.size() - 1U; return; } for (unsigned int i = 0U; i < str.size(); ++i) { col++; if (str[i] == '\n' || str[i] == '\r') { col = 0; line++; if (str[i] == '\r' && (i+1) &filenames) : first(NULL), last(NULL), files(filenames) {} simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) : first(NULL), last(NULL), files(filenames) { readfile(istr,filename,outputList); } simplecpp::TokenList::TokenList(const TokenList &other) : first(NULL), last(NULL), files(other.files) { *this = other; } simplecpp::TokenList::~TokenList() { clear(); } void simplecpp::TokenList::operator=(const TokenList &other) { if (this == &other) return; clear(); for (const Token *tok = other.cbegin(); tok; tok = tok->next) push_back(new Token(*tok)); sizeOfType = other.sizeOfType; } void simplecpp::TokenList::clear() { while (first) { Token *next = first->next; delete first; first = next; } last = NULL; sizeOfType.clear(); } void simplecpp::TokenList::push_back(Token *tok) { if (!first) first = tok; else last->next = tok; tok->previous = last; last = tok; } void simplecpp::TokenList::dump() const { std::cout << stringify() << std::endl; } std::string simplecpp::TokenList::stringify() const { std::ostringstream ret; Location loc(files); for (const Token *tok = cbegin(); tok; tok = tok->next) { while (tok->location.line > loc.line) { ret << '\n'; loc.line++; } if (sameline(tok->previous, tok)) ret << ' '; ret << tok->str; loc.adjust(tok->str); } return ret.str(); } static unsigned char readChar(std::istream &istr, unsigned int bom) { unsigned char ch = (unsigned char)istr.get(); // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff if (bom == 0xfeff || bom == 0xfffe) { const unsigned char ch2 = (unsigned char)istr.get(); const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16); } // Handling of newlines.. if (ch == '\r') { ch = '\n'; if (bom == 0 && (char)istr.peek() == '\n') (void)istr.get(); else if (bom == 0xfeff || bom == 0xfffe) { int c1 = istr.get(); int c2 = istr.get(); int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); if (ch16 != '\n') { istr.unget(); istr.unget(); } } } return ch; } static unsigned char peekChar(std::istream &istr, unsigned int bom) { unsigned char ch = (unsigned char)istr.peek(); // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff if (bom == 0xfeff || bom == 0xfffe) { const unsigned char ch2 = (unsigned char)istr.peek(); const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); ch = (unsigned char)((ch16 >= 0x80) ? 0xff : ch16); } // Handling of newlines.. if (ch == '\r') { ch = '\n'; if (bom != 0) (void)istr.peek(); } return ch; } static unsigned short getAndSkipBOM(std::istream &istr) { const unsigned char ch1 = istr.peek(); // The UTF-16 BOM is 0xfffe or 0xfeff. if (ch1 >= 0xfe) { unsigned short bom = ((unsigned char)istr.get() << 8); if (istr.peek() >= 0xfe) return bom | (unsigned char)istr.get(); return 0; } if (ch1 == 0xef && istr.peek() == 0xbb && istr.peek() == 0xbf) { // Skip BOM 0xefbbbf (void)istr.get(); (void)istr.get(); (void)istr.get(); } return 0; } void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filename, OutputList *outputList) { std::stack loc; unsigned int multiline = 0U; const Token *oldLastToken = NULL; const unsigned short bom = getAndSkipBOM(istr); Location location(files); location.fileIndex = fileIndex(filename); location.line = 1U; location.col = 0U; while (istr.good()) { unsigned char ch = readChar(istr,bom); if (!istr.good()) break; location.col++; if (ch == '\n') { if (cend() && cend()->op == '\\') { ++multiline; deleteToken(end()); } else { location.line += multiline + 1; multiline = 0U; } location.col = 0; if (oldLastToken != cend()) { oldLastToken = cend(); const std::string lastline(lastLine()); if (lastline == "# file %str%") { loc.push(location); location.fileIndex = fileIndex(cend()->str.substr(1U, cend()->str.size() - 2U)); location.line = 1U; } // #endfile else if (lastline == "# endfile" && !loc.empty()) { location = loc.top(); loc.pop(); } } continue; } if (std::isspace(ch)) continue; TokenString currentToken; // number or name if (std::isalnum(ch) || ch == '_') { while (istr.good() && (std::isalnum(ch) || ch == '_')) { currentToken += ch; ch = readChar(istr,bom); } istr.unget(); } // comment else if (ch == '/' && peekChar(istr,bom) == '/') { while (istr.good() && ch != '\r' && ch != '\n') { currentToken += ch; ch = readChar(istr, bom); } if (currentToken[currentToken.size() - 1U] == '\\') { multiline = 1; currentToken = currentToken.erase(currentToken.size() - 1U); } else { istr.unget(); } } // comment else if (ch == '/' && peekChar(istr,bom) == '*') { currentToken = "/*"; (void)readChar(istr,bom); ch = readChar(istr,bom); while (istr.good()) { currentToken += ch; if (currentToken.size() >= 4U && currentToken.substr(currentToken.size() - 2U) == "*/") break; ch = readChar(istr,bom); } } // string / char literal else if (ch == '\"' || ch == '\'') { currentToken = readUntil(istr,location,ch,ch,outputList); if (currentToken.size() < 2U) return; } else { currentToken += ch; } if (currentToken == "<" && lastLine() == "# include") { currentToken = readUntil(istr, location, '<', '>', outputList); if (currentToken.size() < 2U) return; } push_back(new Token(currentToken, location)); location.adjust(currentToken); } combineOperators(); } void simplecpp::TokenList::constFold() { while (begin()) { // goto last '(' Token *tok = end(); while (tok && tok->op != '(') tok = tok->previous; // no '(', goto first token if (!tok) tok = begin(); // Constant fold expression constFoldUnaryNotPosNeg(tok); constFoldMulDivRem(tok); constFoldAddSub(tok); constFoldComparison(tok); constFoldBitwise(tok); constFoldLogicalOp(tok); constFoldQuestionOp(&tok); // If there is no '(' we are done with the constant folding if (tok->op != '(') break; if (!tok->next || !tok->next->next || tok->next->next->op != ')') break; tok = tok->next; deleteToken(tok->previous); deleteToken(tok->next); } } void simplecpp::TokenList::combineOperators() { for (Token *tok = begin(); tok; tok = tok->next) { if (tok->op == '.') { // float literals.. if (tok->previous && tok->previous->number) { tok->setstr(tok->previous->str + '.'); deleteToken(tok->previous); if (tok->next && tok->next->startsWithOneOf("Ee")) { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } } if (tok->next && tok->next->number) { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } } // match: [0-9.]+E [+-] [0-9]+ const char lastChar = tok->str[tok->str.size() - 1]; if (tok->number && (lastChar == 'E' || lastChar == 'e') && tok->next && tok->next->isOneOf("+-") && tok->next->next && tok->next->next->number) { tok->setstr(tok->str + tok->next->op + tok->next->next->str); deleteToken(tok->next); deleteToken(tok->next); } if (tok->op == '\0' || !tok->next || tok->next->op == '\0') continue; if (tok->next->op == '=' && tok->isOneOf("=!<>+-*/%&|^")) { tok->setstr(tok->str + "="); deleteToken(tok->next); } else if ((tok->op == '|' || tok->op == '&') && tok->op == tok->next->op) { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } else if (tok->op == ':' && tok->next->op == ':') { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } else if (tok->op == '-' && tok->next->op == '>') { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } else if ((tok->op == '<' || tok->op == '>') && tok->op == tok->next->op) { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); if (tok->next && tok->next->op == '=') { tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } } else if ((tok->op == '+' || tok->op == '-') && tok->op == tok->next->op) { if (tok->location.col + 1U != tok->next->location.col) continue; if (tok->previous && tok->previous->number) continue; if (tok->next->next && tok->next->next->number) continue; tok->setstr(tok->str + tok->next->str); deleteToken(tok->next); } } } void simplecpp::TokenList::constFoldUnaryNotPosNeg(simplecpp::Token *tok) { for (; tok && tok->op != ')'; tok = tok->next) { if (tok->op == '!' && tok->next && tok->next->number) { tok->setstr(tok->next->str == "0" ? "1" : "0"); deleteToken(tok->next); } else { if (tok->previous && (tok->previous->number || tok->previous->name)) continue; if (!tok->next || !tok->next->number) continue; switch (tok->op) { case '+': tok->setstr(tok->next->str); deleteToken(tok->next); break; case '-': tok->setstr(tok->op + tok->next->str); deleteToken(tok->next); break; } } } } void simplecpp::TokenList::constFoldMulDivRem(Token *tok) { for (; tok && tok->op != ')'; tok = tok->next) { if (!tok->previous || !tok->previous->number) continue; if (!tok->next || !tok->next->number) continue; long long result; if (tok->op == '*') result = (stringToLL(tok->previous->str) * stringToLL(tok->next->str)); else if (tok->op == '/' || tok->op == '%') { long long rhs = stringToLL(tok->next->str); if (rhs == 0) throw std::overflow_error("division/modulo by zero"); long long lhs = stringToLL(tok->previous->str); if (rhs == -1 && lhs == std::numeric_limits::min()) throw std::overflow_error("division overflow"); if (tok->op == '/') result = (lhs / rhs); else result = (lhs % rhs); } else continue; tok = tok->previous; tok->setstr(toString(result)); deleteToken(tok->next); deleteToken(tok->next); } } void simplecpp::TokenList::constFoldAddSub(Token *tok) { for (; tok && tok->op != ')'; tok = tok->next) { if (!tok->previous || !tok->previous->number) continue; if (!tok->next || !tok->next->number) continue; long long result; if (tok->op == '+') result = stringToLL(tok->previous->str) + stringToLL(tok->next->str); else if (tok->op == '-') result = stringToLL(tok->previous->str) - stringToLL(tok->next->str); else continue; tok = tok->previous; tok->setstr(toString(result)); deleteToken(tok->next); deleteToken(tok->next); } } void simplecpp::TokenList::constFoldComparison(Token *tok) { for (; tok && tok->op != ')'; tok = tok->next) { if (!tok->startsWithOneOf("<>=!")) continue; if (!tok->previous || !tok->previous->number) continue; if (!tok->next || !tok->next->number) continue; int result; if (tok->str == "==") result = (stringToLL(tok->previous->str) == stringToLL(tok->next->str)); else if (tok->str == "!=") result = (stringToLL(tok->previous->str) != stringToLL(tok->next->str)); else if (tok->str == ">") result = (stringToLL(tok->previous->str) > stringToLL(tok->next->str)); else if (tok->str == ">=") result = (stringToLL(tok->previous->str) >= stringToLL(tok->next->str)); else if (tok->str == "<") result = (stringToLL(tok->previous->str) < stringToLL(tok->next->str)); else if (tok->str == "<=") result = (stringToLL(tok->previous->str) <= stringToLL(tok->next->str)); else continue; tok = tok->previous; tok->setstr(toString(result)); deleteToken(tok->next); deleteToken(tok->next); } } void simplecpp::TokenList::constFoldBitwise(Token *tok) { Token * const tok1 = tok; for (const char *op = "&^|"; *op; op++) { std::string altop; if (*op == '&') altop = "bitand"; else if (*op == '|') altop = "bitor"; else altop = "xor"; for (tok = tok1; tok && tok->op != ')'; tok = tok->next) { if (tok->op != *op && tok->str != altop) continue; if (!tok->previous || !tok->previous->number) continue; if (!tok->next || !tok->next->number) continue; long long result; if (*op == '&') result = (stringToLL(tok->previous->str) & stringToLL(tok->next->str)); else if (*op == '^') result = (stringToLL(tok->previous->str) ^ stringToLL(tok->next->str)); else /*if (*op == '|')*/ result = (stringToLL(tok->previous->str) | stringToLL(tok->next->str)); tok = tok->previous; tok->setstr(toString(result)); deleteToken(tok->next); deleteToken(tok->next); } } } void simplecpp::TokenList::constFoldLogicalOp(Token *tok) { for (; tok && tok->op != ')'; tok = tok->next) { if (tok->str != "&&" && tok->str != "||" && tok->str != "and" && tok->str != "or") continue; if (!tok->previous || !tok->previous->number) continue; if (!tok->next || !tok->next->number) continue; int result; if (tok->str == "||" || tok->str == "or") result = (stringToLL(tok->previous->str) || stringToLL(tok->next->str)); else /*if (tok->str == "&&")*/ result = (stringToLL(tok->previous->str) && stringToLL(tok->next->str)); tok = tok->previous; tok->setstr(toString(result)); deleteToken(tok->next); deleteToken(tok->next); } } void simplecpp::TokenList::constFoldQuestionOp(Token **tok1) { bool gotoTok1 = false; for (Token *tok = *tok1; tok && tok->op != ')'; tok = gotoTok1 ? *tok1 : tok->next) { gotoTok1 = false; if (tok->str != "?") continue; if (!tok->previous || !tok->previous->number) continue; if (!tok->next) continue; if (!tok->next->next || tok->next->next->op != ':') continue; Token * const condTok = tok->previous; Token * const trueTok = tok->next; Token * const falseTok = trueTok->next->next; if (condTok == *tok1) *tok1 = (condTok->str != "0" ? trueTok : falseTok); deleteToken(condTok->next); // ? deleteToken(trueTok->next); // : deleteToken(condTok->str == "0" ? trueTok : falseTok); deleteToken(condTok); gotoTok1 = true; } } void simplecpp::TokenList::removeComments() { Token *tok = first; while (tok) { Token *tok1 = tok; tok = tok->next; if (tok1->comment) deleteToken(tok1); } } std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location &location, const char start, const char end, OutputList *outputList) { std::string ret; ret += start; char ch = 0; while (ch != end && ch != '\r' && ch != '\n' && istr.good()) { ch = (unsigned char)istr.get(); ret += ch; if (ch == '\\') ret += (unsigned char)istr.get(); } if (!istr.good() || ch != end) { clear(); if (outputList) { Output err(files); err.type = Output::ERROR; err.location = location; err.msg = std::string("No pair for character (") + start + "). Can't process file. File is either invalid or unicode, which is currently not supported."; outputList->push_back(err); } return ""; } return ret; } std::string simplecpp::TokenList::lastLine() const { std::string ret; for (const Token *tok = cend(); sameline(tok,cend()); tok = tok->previous) { if (tok->comment) continue; if (!ret.empty()) ret = ' ' + ret; ret = (tok->str[0] == '\"' ? std::string("%str%") : tok->str) + ret; } return ret; } unsigned int simplecpp::TokenList::fileIndex(const std::string &filename) { for (unsigned int i = 0; i < files.size(); ++i) { if (files[i] == filename) return i; } files.push_back(filename); return files.size() - 1U; } namespace simplecpp { class Macro { public: Macro(std::vector &f) : nameToken(NULL), files(f), tokenListDefine(f) {} explicit Macro(const Token *tok, std::vector &f) : nameToken(NULL), files(f), tokenListDefine(f) { if (sameline(tok->previous, tok)) throw std::runtime_error("bad macro syntax"); if (tok->op != '#') throw std::runtime_error("bad macro syntax"); tok = tok->next; if (!tok || tok->str != DEFINE) throw std::runtime_error("bad macro syntax"); tok = tok->next; if (!tok || !tok->name) throw std::runtime_error("bad macro syntax"); parseDefine(tok); } explicit Macro(const std::string &name, const std::string &value, std::vector &f) : nameToken(NULL), files(f), tokenListDefine(f) { const std::string def(name + ' ' + value); std::istringstream istr(def); tokenListDefine.readfile(istr); parseDefine(tokenListDefine.cbegin()); } Macro(const Macro ¯o) : nameToken(NULL), files(macro.files), tokenListDefine(macro.files) { *this = macro; } void operator=(const Macro ¯o) { if (this != ¯o) { if (macro.tokenListDefine.empty()) parseDefine(macro.nameToken); else { tokenListDefine = macro.tokenListDefine; parseDefine(tokenListDefine.cbegin()); } } } const Token * expand(TokenList * const output, const Token * rawtok, const std::map ¯os, std::vector &files) const { std::set expandedmacros; TokenList output2(files); rawtok = expand(&output2, rawtok->location, rawtok, macros, expandedmacros); while (output2.cend() && rawtok) { unsigned int par = 0; Token* macro2tok = output2.end(); while (macro2tok) { if (macro2tok->op == '(') { if (par==0) break; --par; } else if (macro2tok->op == ')') ++par; macro2tok = macro2tok->previous; } if (macro2tok) { // macro2tok->op == '(' macro2tok = macro2tok->previous; expandedmacros.insert(name()); } else if (rawtok->op == '(') macro2tok = output2.end(); if (!macro2tok || !macro2tok->name) break; if (output2.cbegin() != output2.cend() && macro2tok->str == this->name()) break; const std::map::const_iterator macro = macros.find(macro2tok->str); if (macro == macros.end() || !macro->second.functionLike()) break; TokenList rawtokens2(files); const Location loc(macro2tok->location); while (macro2tok) { Token *next = macro2tok->next; rawtokens2.push_back(new Token(macro2tok->str, loc)); output2.deleteToken(macro2tok); macro2tok = next; } par = (rawtokens2.cbegin() != rawtokens2.cend()) ? 1U : 0U; const Token *rawtok2 = rawtok; for (; rawtok2; rawtok2 = rawtok2->next) { rawtokens2.push_back(new Token(rawtok2->str, loc)); if (rawtok2->op == '(') ++par; else if (rawtok2->op == ')') { if (par <= 1U) break; --par; } } if (!rawtok2 || par != 1U) break; if (macro->second.expand(&output2, rawtok->location, rawtokens2.cbegin(), macros, expandedmacros) != NULL) break; rawtok = rawtok2->next; } output->takeTokens(output2); return rawtok; } const Token * expand(TokenList * const output, const Location &loc, const Token * const nameToken, const std::map ¯os, std::set expandedmacros) const { const std::set expandedmacros1(expandedmacros); expandedmacros.insert(nameToken->str); usageList.push_back(loc); const std::vector parametertokens(getMacroParameters(nameToken, !expandedmacros1.empty())); Token * const output_end_1 = output->end(); if (functionLike()) { // No arguments => not macro expansion if (nameToken->next && nameToken->next->op != '(') { output->push_back(new Token(nameToken->str, loc)); return nameToken->next; } // Parse macro-call if (variadic) { if (parametertokens.size() < args.size()) { throw wrongNumberOfParameters(nameToken->location, name()); } } else { if (parametertokens.size() != args.size() + (args.empty() ? 2U : 1U)) throw wrongNumberOfParameters(nameToken->location, name()); } } // expand for (const Token *tok = valueToken; tok != endToken;) { if (tok->op != '#') { // A##B => AB if (tok->next && tok->next->op == '#' && tok->next->next && tok->next->next->op == '#') { output->push_back(newMacroToken(expandArgStr(tok, parametertokens), loc, !expandedmacros1.empty())); tok = tok->next; } else { tok = expandToken(output, loc, tok, macros, expandedmacros1, expandedmacros, parametertokens); } continue; } tok = tok->next; if (tok == endToken) { output->push_back(new Token(*tok->previous)); break; } if (tok->op == '#') { // A##B => AB Token *A = output->end(); if (!A) throw invalidHashHash(tok->location, name()); if (!sameline(tok, tok->next)) throw invalidHashHash(tok->location, name()); const std::string strAB = A->str + expandArgStr(tok->next, parametertokens); tok = tok->next->next; output->deleteToken(A); TokenList tokens(files); tokens.push_back(new Token(strAB, tok->location)); // TODO: For functionLike macros, push the (...) expandToken(output, loc, tokens.cbegin(), macros, expandedmacros1, expandedmacros, parametertokens); } else { // #123 => "123" TokenList tokenListHash(files); tok = expandToken(&tokenListHash, loc, tok, macros, expandedmacros1, expandedmacros, parametertokens); std::string s; for (const Token *hashtok = tokenListHash.cbegin(); hashtok; hashtok = hashtok->next) s += hashtok->str; output->push_back(newMacroToken('\"' + s + '\"', loc, expandedmacros1.empty())); } } if (!functionLike()) setMacroName(output, output_end_1, expandedmacros1); return functionLike() ? parametertokens.back()->next : nameToken->next; } const TokenString &name() const { return nameToken->str; } const Location &defineLocation() const { return nameToken->location; } const std::list &usage() const { return usageList; } bool functionLike() const { return nameToken->next && nameToken->next->op == '(' && sameline(nameToken, nameToken->next) && nameToken->next->location.col == nameToken->location.col + nameToken->str.size(); } struct Error { Error(const Location &loc, const std::string &s) : location(loc), what(s) {} Location location; std::string what; }; struct wrongNumberOfParameters : public Error { wrongNumberOfParameters(const Location &loc, const std::string ¯oName) : Error(loc, "Syntax error. Wrong number of parameters for macro \'" + macroName + "\'.") {} }; struct invalidHashHash : public Error { invalidHashHash(const Location &loc, const std::string ¯oName) : Error(loc, "Syntax error. Invalid ## usage when expanding \'" + macroName + "\'.") {} }; private: Token *newMacroToken(const TokenString &str, const Location &loc, bool rawCode) const { Token *tok = new Token(str,loc); if (!rawCode) tok->macro = nameToken->str; return tok; } void setMacroName(TokenList *output, Token *token1, const std::set &expandedmacros1) const { if (!expandedmacros1.empty()) return; for (Token *tok = token1 ? token1->next : output->begin(); tok; tok = tok->next) { if (!tok->macro.empty()) tok->macro = nameToken->str; } } void parseDefine(const Token *nametoken) { nameToken = nametoken; variadic = false; if (!nameToken) { valueToken = endToken = NULL; args.clear(); return; } // function like macro.. if (functionLike()) { args.clear(); const Token *argtok = nameToken->next->next; while (argtok && argtok->op != ')') { if (argtok->op == '.' && argtok->next && argtok->next->op == '.' && argtok->next->next && argtok->next->next->op == '.' && argtok->next->next->next && argtok->next->next->next->op == ')') { variadic = true; if (!argtok->previous->name) args.push_back("__VA_ARGS__"); argtok = argtok->next->next->next; // goto ')' break; } if (argtok->op != ',') args.push_back(argtok->str); argtok = argtok->next; } valueToken = argtok->next; } else { args.clear(); valueToken = nameToken->next; } if (!sameline(valueToken, nameToken)) valueToken = NULL; endToken = valueToken; while (sameline(endToken, nameToken)) endToken = endToken->next; } unsigned int getArgNum(const TokenString &str) const { unsigned int par = 0; while (par < args.size()) { if (str == args[par]) return par; par++; } return ~0U; } std::vector getMacroParameters(const Token *nameToken, bool def) const { if (!nameToken->next || nameToken->next->op != '(' || !functionLike()) return std::vector(); std::vector parametertokens; parametertokens.push_back(nameToken->next); unsigned int par = 0U; for (const Token *tok = nameToken->next->next; def ? sameline(tok,nameToken) : (tok != NULL); tok = tok->next) { if (tok->op == '(') ++par; else if (tok->op == ')') { if (par == 0U) { parametertokens.push_back(tok); break; } --par; } else if (par == 0U && tok->op == ',' && (!variadic || parametertokens.size() < args.size())) parametertokens.push_back(tok); } return parametertokens; } const Token *appendTokens(TokenList *tokens, const Token *lpar, const std::map ¯os, const std::set &expandedmacros1, const std::set &expandedmacros, const std::vector ¶metertokens) const { if (!lpar || lpar->op != '(') return NULL; unsigned int par = 0; const Token *tok = lpar; while (sameline(lpar, tok)) { if (!expandArg(tokens, tok, tok->location, macros, expandedmacros1, expandedmacros, parametertokens)) tokens->push_back(new Token(*tok)); if (tok->op == '(') ++par; else if (tok->op == ')') { --par; if (par == 0U) break; } tok = tok->next; } return sameline(lpar,tok) ? tok : NULL; } const Token *expandToken(TokenList *output, const Location &loc, const Token *tok, const std::map ¯os, std::set expandedmacros1, std::set expandedmacros, const std::vector ¶metertokens) const { // Not name.. if (!tok->name) { output->push_back(newMacroToken(tok->str, loc, false)); return tok->next; } // Macro parameter.. { TokenList temp(files); if (expandArg(&temp, tok, loc, macros, expandedmacros1, expandedmacros, parametertokens)) { if (!(temp.cend() && temp.cend()->name && tok->next && tok->next->op == '(')) { output->takeTokens(temp); return tok->next; } const std::map::const_iterator it = macros.find(temp.cend()->str); if (it == macros.end() || expandedmacros.find(temp.cend()->str) != expandedmacros.end()) { output->takeTokens(temp); return tok->next; } const Macro &calledMacro = it->second; if (!calledMacro.functionLike()) { output->takeTokens(temp); return tok->next; } TokenList temp2(files); temp2.push_back(new Token(temp.cend()->str, tok->location)); const Token *tok2 = appendTokens(&temp2, tok->next, macros, expandedmacros1, expandedmacros, parametertokens); if (!tok2) return tok->next; output->takeTokens(temp); output->deleteToken(output->end()); calledMacro.expand(output, loc, temp2.cbegin(), macros, expandedmacros); return tok2->next; } } // Macro.. const std::map::const_iterator it = macros.find(tok->str); if (it != macros.end() && expandedmacros.find(tok->str) == expandedmacros.end()) { const Macro &calledMacro = it->second; if (!calledMacro.functionLike()) return calledMacro.expand(output, loc, tok, macros, expandedmacros); if (!sameline(tok, tok->next) || tok->next->op != '(') { output->push_back(newMacroToken(tok->str, loc, false)); return tok->next; } TokenList tokens(files); tokens.push_back(new Token(*tok)); const Token *tok2 = appendTokens(&tokens, tok->next, macros, expandedmacros1, expandedmacros, parametertokens); if (!tok2) { output->push_back(newMacroToken(tok->str, loc, false)); return tok->next; } calledMacro.expand(output, loc, tokens.cbegin(), macros, expandedmacros); return tok2->next; } output->push_back(newMacroToken(tok->str, loc, false)); return tok->next; } bool expandArg(TokenList *output, const Token *tok, const std::vector ¶metertokens) const { if (!tok->name) return false; const unsigned int argnr = getArgNum(tok->str); if (argnr >= args.size()) return false; // empty variadic parameter if (variadic && argnr + 1U >= parametertokens.size()) return true; for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U]; partok = partok->next) output->push_back(new Token(*partok)); return true; } bool expandArg(TokenList *output, const Token *tok, const Location &loc, const std::map ¯os, std::set expandedmacros1, std::set expandedmacros, const std::vector ¶metertokens) const { if (!tok->name) return false; const unsigned int argnr = getArgNum(tok->str); if (argnr >= args.size()) return false; if (variadic && argnr + 1U >= parametertokens.size()) // empty variadic parameter return true; for (const Token *partok = parametertokens[argnr]->next; partok != parametertokens[argnr + 1U];) { const std::map::const_iterator it = macros.find(partok->str); if (it != macros.end() && expandedmacros1.find(partok->str) == expandedmacros1.end()) partok = it->second.expand(output, loc, partok, macros, expandedmacros); else { output->push_back(newMacroToken(partok->str, loc, expandedmacros1.empty())); partok = partok->next; } } return true; } std::string expandArgStr(const Token *tok, const std::vector ¶metertokens) const { TokenList tokens(files); if (expandArg(&tokens, tok, parametertokens)) { std::string s; for (const Token *tok2 = tokens.cbegin(); tok2; tok2 = tok2->next) s += tok2->str; return s; } return tok->str; } void setMacro(Token *tok) const { while (tok) { if (!tok->macro.empty()) tok->macro = nameToken->str; tok = tok->next; } } const Token *nameToken; std::vector args; bool variadic; const Token *valueToken; const Token *endToken; std::vector &files; TokenList tokenListDefine; mutable std::list usageList; }; } namespace simplecpp { std::string simplifyPath(std::string path) { // replace backslash separators std::string::size_type pos = 0; while ((pos = path.find("\\",pos)) != std::string::npos) path[pos] = '/'; // "./" at the start if (path.size() > 3 && path.compare(0,2,"./") == 0 && path[2] != '/') path.erase(0,2); // remove "/./" pos = 0; while ((pos = path.find("/./",pos)) != std::string::npos) { path.erase(pos,2); } // remove "xyz/../" pos = 1U; while ((pos = path.find("/../", pos)) != std::string::npos) { const std::string::size_type pos1 = path.rfind("/", pos - 1U); if (pos1 == std::string::npos) pos++; else { path.erase(pos1,pos-pos1+3); pos = std::min((std::string::size_type)1, pos1); } } return path; } } namespace { void simplifySizeof(simplecpp::TokenList &expr, const std::map &sizeOfType) { for (simplecpp::Token *tok = expr.begin(); tok; tok = tok->next) { if (tok->str != "sizeof") continue; simplecpp::Token *tok1 = tok->next; simplecpp::Token *tok2 = tok1->next; if (tok1->op == '(') { tok1 = tok1->next; while (tok2->op != ')') tok2 = tok2->next; } std::string type; for (simplecpp::Token *typeToken = tok1; typeToken != tok2; typeToken = typeToken->next) { if ((typeToken->str == "unsigned" || typeToken->str == "signed") && typeToken->next->name) continue; if (typeToken->str == "*" && type.find("*") != std::string::npos) continue; if (!type.empty()) type += ' '; type += typeToken->str; } const std::map::const_iterator it = sizeOfType.find(type); if (it != sizeOfType.end()) tok->setstr(toString(it->second)); else continue; tok2 = tok2->next; while (tok->next != tok2) expr.deleteToken(tok->next); } } void simplifyName(simplecpp::TokenList &expr) { std::set altop; altop.insert("and"); altop.insert("or"); altop.insert("bitand"); altop.insert("bitor"); altop.insert("xor"); for (simplecpp::Token *tok = expr.begin(); tok; tok = tok->next) { if (tok->name) { if (altop.find(tok->str) != altop.end()) { bool alt = true; if (!tok->previous || !tok->next) alt = false; if (!(tok->previous->number || tok->previous->op == ')')) alt = false; if (alt) continue; } tok->setstr("0"); } } } void simplifyNumbers(simplecpp::TokenList &expr) { for (simplecpp::Token *tok = expr.begin(); tok; tok = tok->next) { if (tok->str.size() == 1U) continue; if (tok->str.compare(0,2,"0x") == 0) tok->setstr(toString(stringToULL(tok->str))); else if (tok->str[0] == '\'') tok->setstr(toString(tok->str[1] & 0xffU)); } } long long evaluate(simplecpp::TokenList &expr, const std::map &sizeOfType) { simplifySizeof(expr, sizeOfType); simplifyName(expr); simplifyNumbers(expr); expr.constFold(); // TODO: handle invalid expressions return expr.cbegin() && expr.cbegin() == expr.cend() && expr.cbegin()->number ? stringToLL(expr.cbegin()->str) : 0LL; } const simplecpp::Token *gotoNextLine(const simplecpp::Token *tok) { const unsigned int line = tok->location.line; const unsigned int file = tok->location.fileIndex; while (tok && tok->location.line == line && tok->location.fileIndex == file) tok = tok->next; return tok; } std::string openHeader(std::ifstream &f, const simplecpp::DUI &dui, const std::string &sourcefile, const std::string &header, bool systemheader) { if (!systemheader) { if (sourcefile.find_first_of("\\/") != std::string::npos) { const std::string s = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; f.open(s.c_str()); if (f.is_open()) return simplecpp::simplifyPath(s); } else { f.open(header.c_str()); if (f.is_open()) return simplecpp::simplifyPath(header); } } for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { std::string s = *it; if (!s.empty() && s[s.size()-1U]!='/' && s[s.size()-1U]!='\\') s += '/'; s += header; f.open(s.c_str()); if (f.is_open()) return simplecpp::simplifyPath(s); } return ""; } std::string getFileName(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { if (!systemheader) { if (sourcefile.find_first_of("\\/") != std::string::npos) { const std::string s = sourcefile.substr(0, sourcefile.find_last_of("\\/") + 1U) + header; if (filedata.find(s) != filedata.end()) return simplecpp::simplifyPath(s); } else { if (filedata.find(header) != filedata.end()) return simplecpp::simplifyPath(header); } } for (std::list::const_iterator it = dui.includePaths.begin(); it != dui.includePaths.end(); ++it) { std::string s = *it; if (!s.empty() && s[s.size()-1U]!='/' && s[s.size()-1U]!='\\') s += '/'; s += header; if (filedata.find(s) != filedata.end()) return simplecpp::simplifyPath(s); } return ""; } bool hasFile(const std::map &filedata, const std::string &sourcefile, const std::string &header, const simplecpp::DUI &dui, bool systemheader) { return !getFileName(filedata, sourcefile, header, dui, systemheader).empty(); } } std::map simplecpp::load(const simplecpp::TokenList &rawtokens, std::vector &fileNumbers, const struct simplecpp::DUI &dui, simplecpp::OutputList *outputList) { std::map ret; std::list filelist; for (const Token *rawtok = rawtokens.cbegin(); rawtok || !filelist.empty(); rawtok = rawtok->next) { if (rawtok == NULL) { rawtok = filelist.back(); filelist.pop_back(); } if (rawtok->op != '#' || sameline(rawtok->previousSkipComments(), rawtok)) continue; rawtok = rawtok->nextSkipComments(); if (!rawtok || rawtok->str != INCLUDE) continue; const std::string &sourcefile = rawtok->location.file(); const Token *htok = rawtok->nextSkipComments(); if (!sameline(rawtok, htok)) continue; bool systemheader = (htok->str[0] == '<'); const std::string header(htok->str.substr(1U, htok->str.size() - 2U)); if (hasFile(ret, sourcefile, header, dui, systemheader)) continue; std::ifstream f; const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); if (!f.is_open()) continue; ret[header2] = 0; TokenList *tokens = new TokenList(f, fileNumbers, header2, outputList); ret[header2] = tokens; if (tokens->cbegin()) filelist.push_back(tokens->cbegin()); } return ret; } void simplecpp::preprocess(simplecpp::TokenList &output, const simplecpp::TokenList &rawtokens, std::vector &files, const std::map &filedata, const struct simplecpp::DUI &dui, simplecpp::OutputList *outputList, std::list *macroUsage) { std::map sizeOfType(rawtokens.sizeOfType); sizeOfType.insert(std::pair(std::string("char"), sizeof(char))); sizeOfType.insert(std::pair(std::string("short"), sizeof(short))); sizeOfType.insert(std::pair(std::string("short int"), sizeof(short int))); sizeOfType.insert(std::pair(std::string("int"), sizeof(int))); sizeOfType.insert(std::pair(std::string("long int"), sizeof(long int))); sizeOfType.insert(std::pair(std::string("long"), sizeof(long))); sizeOfType.insert(std::pair(std::string("long long"), sizeof(long long))); sizeOfType.insert(std::pair(std::string("float"), sizeof(float))); sizeOfType.insert(std::pair(std::string("double"), sizeof(double))); sizeOfType.insert(std::pair(std::string("long double"), sizeof(long double))); sizeOfType.insert(std::pair(std::string("char *"), sizeof(char *))); sizeOfType.insert(std::pair(std::string("short *"), sizeof(short *))); sizeOfType.insert(std::pair(std::string("short int *"), sizeof(short int *))); sizeOfType.insert(std::pair(std::string("int *"), sizeof(int *))); sizeOfType.insert(std::pair(std::string("long int *"), sizeof(long int *))); sizeOfType.insert(std::pair(std::string("long *"), sizeof(long *))); sizeOfType.insert(std::pair(std::string("long long *"), sizeof(long long *))); sizeOfType.insert(std::pair(std::string("float *"), sizeof(float *))); sizeOfType.insert(std::pair(std::string("double *"), sizeof(double *))); sizeOfType.insert(std::pair(std::string("long double *"), sizeof(long double *))); std::map macros; for (std::list::const_iterator it = dui.defines.begin(); it != dui.defines.end(); ++it) { const std::string ¯ostr = *it; const std::string::size_type eq = macrostr.find("="); const std::string::size_type par = macrostr.find("("); const std::string macroname = macrostr.substr(0, std::min(eq,par)); if (dui.undefined.find(macroname) != dui.undefined.end()) continue; const std::string lhs(macrostr.substr(0,eq)); const std::string rhs(eq==std::string::npos ? std::string("1") : macrostr.substr(eq+1)); const Macro macro(lhs, rhs, files); macros.insert(std::pair(macro.name(), macro)); } // TRUE => code in current #if block should be kept // ELSE_IS_TRUE => code in current #if block should be dropped. the code in the #else should be kept. // ALWAYS_FALSE => drop all code in #if and #else enum IfState { TRUE, ELSE_IS_TRUE, ALWAYS_FALSE }; std::stack ifstates; ifstates.push(TRUE); std::list includes; std::stack includetokenstack; std::set pragmaOnce; for (const Token *rawtok = rawtokens.cbegin(); rawtok || !includetokenstack.empty();) { if (rawtok == NULL) { rawtok = includetokenstack.top(); includetokenstack.pop(); continue; } if (rawtok->op == '#' && !sameline(rawtok->previous, rawtok)) { rawtok = rawtok->next; if (!rawtok || !rawtok->name) continue; if (ifstates.top() == TRUE && (rawtok->str == ERROR || rawtok->str == WARNING)) { if (outputList) { simplecpp::Output err(rawtok->location.files); err.type = rawtok->str == ERROR ? Output::ERROR : Output::WARNING; err.location = rawtok->location; for (const Token *tok = rawtok->next; tok && sameline(rawtok,tok); tok = tok->next) { if (!err.msg.empty() && std::isalnum(tok->str[0])) err.msg += ' '; err.msg += tok->str; } err.msg = '#' + rawtok->str + ' ' + err.msg; outputList->push_back(err); } output.clear(); return; } if (rawtok->str == DEFINE) { if (ifstates.top() != TRUE) continue; try { const Macro ¯o = Macro(rawtok->previous, files); if (dui.undefined.find(macro.name()) == dui.undefined.end()) { std::map::iterator it = macros.find(macro.name()); if (it == macros.end()) macros.insert(std::pair(macro.name(), macro)); else it->second = macro; } } catch (const std::runtime_error &) { } } else if (ifstates.top() == TRUE && rawtok->str == INCLUDE) { const bool systemheader = (rawtok->next->str[0] == '<'); const std::string header(rawtok->next->str.substr(1U, rawtok->next->str.size() - 2U)); const std::string header2 = getFileName(filedata, rawtok->location.file(), header, dui, systemheader); if (!header2.empty() && pragmaOnce.find(header2) == pragmaOnce.end()) { includetokenstack.push(gotoNextLine(rawtok)); const TokenList *includetokens = filedata.find(header2)->second; rawtok = includetokens ? includetokens->cbegin() : 0; continue; } else { simplecpp::Output output(files); output.type = Output::MISSING_INCLUDE; output.location = rawtok->location; output.msg = "Header not found: " + rawtok->next->str; if (outputList) outputList->push_back(output); } } else if (rawtok->str == IF || rawtok->str == IFDEF || rawtok->str == IFNDEF || rawtok->str == ELIF) { bool conditionIsTrue; if (ifstates.top() == ALWAYS_FALSE || (ifstates.top() == ELSE_IS_TRUE && rawtok->str != ELIF)) conditionIsTrue = false; else if (rawtok->str == IFDEF) conditionIsTrue = (macros.find(rawtok->next->str) != macros.end()); else if (rawtok->str == IFNDEF) conditionIsTrue = (macros.find(rawtok->next->str) == macros.end()); else { /*if (rawtok->str == IF || rawtok->str == ELIF)*/ TokenList expr(files); for (const Token *tok = rawtok->next; tok && tok->location.sameline(rawtok->location); tok = tok->next) { if (!tok->name) { expr.push_back(new Token(*tok)); continue; } if (tok->str == DEFINED) { tok = tok->next; const bool par = (tok && tok->op == '('); if (par) tok = tok->next; if (!tok) break; if (macros.find(tok->str) != macros.end()) expr.push_back(new Token("1", tok->location)); else expr.push_back(new Token("0", tok->location)); if (tok && par) tok = tok->next; continue; } const std::map::const_iterator it = macros.find(tok->str); if (it != macros.end()) { TokenList value(files); try { it->second.expand(&value, tok, macros, files); } catch (Macro::Error &err) { Output out(rawtok->location.files); out.type = Output::ERROR; out.location = err.location; out.msg = "failed to expand \'" + tok->str + "\', " + err.what; if (outputList) outputList->push_back(out); output.clear(); return; } expr.takeTokens(value); } else { expr.push_back(new Token(*tok)); } } try { conditionIsTrue = (evaluate(expr, sizeOfType) != 0); } catch (const std::exception &) { Output out(rawtok->location.files); out.type = Output::ERROR; out.location = rawtok->location; out.msg = "failed to evaluate " + std::string(rawtok->str == IF ? "#if" : "#elif") + " condition"; if (outputList) outputList->push_back(out); output.clear(); return; } } if (rawtok->str != ELIF) { // push a new ifstate.. if (ifstates.top() != TRUE) ifstates.push(ALWAYS_FALSE); else ifstates.push(conditionIsTrue ? TRUE : ELSE_IS_TRUE); } else if (ifstates.top() == TRUE) { ifstates.top() = ALWAYS_FALSE; } else if (ifstates.top() == ELSE_IS_TRUE && conditionIsTrue) { ifstates.top() = TRUE; } } else if (rawtok->str == ELSE) { ifstates.top() = (ifstates.top() == ELSE_IS_TRUE) ? TRUE : ALWAYS_FALSE; } else if (rawtok->str == ENDIF) { if (ifstates.size() > 1U) ifstates.pop(); } else if (rawtok->str == UNDEF) { if (ifstates.top() == TRUE) { const Token *tok = rawtok->next; while (sameline(rawtok,tok) && tok->comment) tok = tok->next; if (sameline(rawtok, tok)) macros.erase(tok->str); } } else if (ifstates.top() == TRUE && rawtok->str == PRAGMA && rawtok->next && rawtok->next->str == ONCE && sameline(rawtok,rawtok->next)) { pragmaOnce.insert(rawtok->location.file()); } rawtok = gotoNextLine(rawtok); continue; } if (ifstates.top() != TRUE) { // drop code rawtok = gotoNextLine(rawtok); continue; } if (macros.find(rawtok->str) != macros.end()) { std::map::const_iterator macro = macros.find(rawtok->str); if (macro != macros.end()) { try { rawtok = macro->second.expand(&output, rawtok, macros, files); } catch (const simplecpp::Macro::Error &err) { Output out(err.location.files); out.type = Output::ERROR; out.location = err.location; out.msg = err.what; if (outputList) outputList->push_back(out); output.clear(); return; } continue; } } if (!rawtok->comment) output.push_back(new Token(*rawtok)); rawtok = rawtok->next; } if (macroUsage) { for (std::map::const_iterator macroIt = macros.begin(); macroIt != macros.end(); ++macroIt) { const Macro ¯o = macroIt->second; const std::list &usage = macro.usage(); for (std::list::const_iterator usageIt = usage.begin(); usageIt != usage.end(); ++usageIt) { struct MacroUsage mu(usageIt->files); mu.macroName = macro.name(); mu.macroLocation = macro.defineLocation(); mu.useLocation = *usageIt; macroUsage->push_back(mu); } } } }