From de29991c1146328713c724d1a98c00dab68e4bec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Mon, 4 Nov 2013 11:26:16 +0100 Subject: [PATCH] AST: Rewrite using standard approach --- lib/tokenlist.cpp | 294 ++++++++++++++++++++++++++++++++---------- lib/tokenlist.h | 2 +- test/testtokenize.cpp | 20 ++- 3 files changed, 244 insertions(+), 72 deletions(-) diff --git a/lib/tokenlist.cpp b/lib/tokenlist.cpp index ffcbe41e9..a014fa565 100644 --- a/lib/tokenlist.cpp +++ b/lib/tokenlist.cpp @@ -357,82 +357,246 @@ bool TokenList::createTokens(std::istream &code, const std::string& file0) //--------------------------------------------------------------------------- -void TokenList::createAst() const +static void compileUnaryOp(Token *&tok, void (*f)(Token *&, std::stack &), std::stack &op) { - // operators that must be ordered according to C-precedence - const char * const operators[] = { - " :: ", - " ++ -- . ", - "> ++ -- + - ! ~ * & sizeof ", // prefix unary operators, from right to left - " * / % ", - " + - ", - " << >> ", - " < <= > >= ", - " == != ", - " & ", - " ^ ", - " | ", - " && ", - " || ", - " = ? : ", - " throw ", - " , " - " [ " - }; + Token *unaryop = tok; + tok = tok->next(); + f(tok,op); - // No tokens => bail out - if (!_front) - return; + if (!op.empty()) { + unaryop->astOperand1(op.top()); + op.pop(); + } + op.push(unaryop); +} - for (unsigned int i = 0; i < sizeof(operators) / sizeof(*operators); ++i) { - // TODO: extract operators to std::set - that should be faster - if (*operators[i] == '>') { // Unary operators, parse from right to left - const std::string op(1+operators[i]); - Token *tok = _front; - while (tok->next()) +static void compileBinOp(Token *&tok, void (*f)(Token *&, std::stack &), std::stack &op) +{ + Token *binop = tok; + tok = tok->next(); + f(tok,op); + + // TODO: Should we check if op is empty. + // * Is it better to add assertion that it isn't? + // * Write debug warning if it's empty? + if (!op.empty()) { + binop->astOperand2(op.top()); + op.pop(); + } + if (!op.empty()) { + binop->astOperand1(op.top()); + op.pop(); + } + op.push(binop); +} + +static void compileExpression(Token *&tok, std::stack &op); + +static void compileTerm(Token *& tok, std::stack &op) +{ + if (tok->isLiteral()) { + op.push(tok); + tok = tok->next(); + } else if (tok->isName()) { + if (Token::Match(tok->next(), "++|--")) { // post increment / decrement + tok = tok->next(); + tok->astOperand1(tok->previous()); + op.push(tok); + tok = tok->next(); + } else if (!Token::Match(tok->next(), "(|[")) { + op.push(tok); + tok = tok->next(); + } else { + Token *name = tok; + tok = tok->tokAt(2); + if (Token::Match(tok, ")|]")) { + name->next()->astOperand1(name); tok = tok->next(); - for (; tok; tok = tok->previous()) { - if (tok->isOp() && - (!tok->previous() || - tok->previous()->isOp() || - tok->previous()->str() == "," || - tok->previous()->type() == Token::eOther) && - op.find(" "+tok->str()+" ")!=std::string::npos) { - tok->astOperand1(tok->next()); - } - } - } else { // parse from left to right - const std::string op(operators[i]); - for (Token *tok = _front; tok; tok = tok->next()) { - if (tok->astOperand1()==NULL && op.find(" "+tok->str()+" ")!=std::string::npos) { - // Don't create AST for "..." - if (tok->str() == "." && (tok->previous()->str() == "." || tok->next()->str() == ".")) - continue; - - if (Token::Match(tok, "* [)]]")) - continue; - - if (tok->type() != Token::eIncDecOp) { - tok->astOperand1(tok->previous()); - tok->astOperand2(tok->next()); - } else if (tok->previous() && !tok->previous()->isOp()) { - tok->astOperand1(tok->previous()); - } + } else { + compileExpression(tok,op); + tok = tok->next(); // skip ')' or ']' + if (!op.empty()) { + name->next()->astOperand2(op.top()); + op.pop(); } + name->next()->astOperand1(name); } + op.push(name->next()); } + } else if (Token::Match(tok, "+|-|~|*|&|!")) { + compileUnaryOp(tok, compileExpression, op); + } else if (Token::Match(tok, "++|--")) { + if (!op.empty() && op.top()->isOp()) { + // post increment/decrement + tok->astOperand1(op.top()); + op.pop(); + op.push(tok); + tok = tok->next(); + } else { + // pre increment/decrement + compileUnaryOp(tok, compileExpression, op); + } + } else if (tok->str() == "(") { + // Parenthesized sub-expression + tok = tok->next(); + compileExpression(tok,op); + tok = tok->next(); } +} - // function calls.. - for (Token *tok = _front; tok; tok = tok->next()) { - if (Token::Match(tok, "%var% (")) - tok->astFunctionCall(); +static void compileScope(Token *&tok, std::stack &op) +{ + compileTerm(tok,op); + while (tok) { + if (tok->str() == "::") { + compileBinOp(tok, compileTerm, op); + } else break; } +} - // parentheses.. - for (Token *tok = _front; tok; tok = tok->next()) { - if (Token::Match(tok, "(|)|]")) { - tok->astHandleParentheses(); +static void compileDot(Token *&tok, std::stack &op) +{ + compileScope(tok,op); + while (tok) { + if (tok->str() == ".") { + compileBinOp(tok, compileScope, op); + } else break; + } +} + +static void compileMulDiv(Token *&tok, std::stack &op) +{ + compileDot(tok,op); + while (tok) { + if (Token::Match(tok, "[*/%]")) { + if (Token::Match(tok, "* [,)]")) + break; + compileBinOp(tok, compileDot, op); + } else break; + } +} + +static void compileAddSub(Token *&tok, std::stack &op) +{ + compileMulDiv(tok,op); + while (tok) { + if (Token::Match(tok, "+|-")) { + compileBinOp(tok, compileMulDiv, op); + } else break; + } +} + +static void compileShift(Token *&tok, std::stack &op) +{ + compileAddSub(tok,op); + while (tok) { + if (Token::Match(tok, "<<|>>")) { + compileBinOp(tok, compileAddSub, op); + } else break; + } +} + +static void compileRelComp(Token *&tok, std::stack &op) +{ + compileShift(tok,op); + while (tok) { + if (Token::Match(tok, "<|<=|>=|>")) { + compileBinOp(tok, compileShift, op); + } else break; + } +} + +static void compileEqComp(Token *&tok, std::stack &op) +{ + compileRelComp(tok,op); + while (tok) { + if (Token::Match(tok, "==|!=")) { + compileBinOp(tok, compileRelComp, op); + } else break; + } +} + +static void compileAnd(Token *&tok, std::stack &op) +{ + compileEqComp(tok,op); + while (tok) { + if (tok->str() == "&") { + compileBinOp(tok, compileEqComp, op); + } else break; + } +} + +static void compileXor(Token *&tok, std::stack &op) +{ + compileAnd(tok,op); + while (tok) { + if (tok->str() == "^") { + compileBinOp(tok, compileAnd, op); + } else break; + } +} + +static void compileOr(Token *&tok, std::stack &op) +{ + compileXor(tok,op); + while (tok) { + if (tok->str() == "|") { + compileBinOp(tok, compileXor, op); + } else break; + } +} + +static void compileLogicAnd(Token *&tok, std::stack &op) +{ + compileOr(tok,op); + while (tok) { + if (tok->str() == "&&") { + compileBinOp(tok, compileOr, op); + } else break; + } +} + +static void compileLogicOr(Token *&tok, std::stack &op) +{ + compileLogicAnd(tok,op); + while (tok) { + if (tok->str() == "||") { + compileBinOp(tok, compileLogicAnd, op); + } else break; + } +} + +static void compileAssign(Token *&tok, std::stack &op) +{ + compileLogicOr(tok,op); + while (tok) { + if (tok->str() == "=") { + compileBinOp(tok, compileLogicOr, op); + } else break; + } +} + +static void compileComma(Token *&tok, std::stack &op) +{ + compileAssign(tok,op); + while (tok) { + if (tok->str() == ",") { + compileBinOp(tok, compileAssign, op); + } else break; + } +} + +static void compileExpression(Token *&tok, std::stack &op) +{ + compileComma(tok,op); +} + +void TokenList::createAst() +{ + for (Token *tok = _front; tok; tok = tok ? tok->next() : NULL) { + if (!tok->previous() || Token::Match(tok, "%var% (|[|.|=")) { + std::stack operands; + compileExpression(tok, operands); } } } diff --git a/lib/tokenlist.h b/lib/tokenlist.h index 022435ee4..7cf0e0e91 100644 --- a/lib/tokenlist.h +++ b/lib/tokenlist.h @@ -104,7 +104,7 @@ public: */ std::string fileLine(const Token *tok) const; - void createAst() const; + void createAst(); private: /** Disable copy constructor, no implementation */ diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index e0d2a8f99..cfc9664ac 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -9962,17 +9962,21 @@ private: ASSERT_EQUALS("abc+=", testAst("a=b+c")); ASSERT_EQUALS("abc=,", testAst("a,b=c")); + ASSERT_EQUALS("a\"\"=", testAst("a=\"\"")); + ASSERT_EQUALS("a\'\'=", testAst("a=\'\'")); + testAst("char a[1]=\"\";"); // don't crash + testAst("int f(char argv[]);"); // don't crash } void astpar() const { // parentheses ASSERT_EQUALS("12+3*", testAst("(1+2)*3")); ASSERT_EQUALS("123+*", testAst("1*(2+3)")); ASSERT_EQUALS("123+*4*", testAst("1*(2+3)*4")); - ASSERT_EQUALS("ab.c&d==if", testAst("if((a.b&c)==d){}")); + ASSERT_EQUALS("ifab.c&d==(", testAst("if((a.b&c)==d){}")); } void astbrackets() const { // [] - ASSERT_EQUALS("123+[4+", testAst("1[2+3]+4")); + ASSERT_EQUALS("a23+[4+", testAst("a[2+3]+4")); } void astunaryop() const { // unary operators @@ -9982,10 +9986,14 @@ private: } void astfunction() const { // function calls - ASSERT_EQUALS("1(f+2+", testAst("1+f()+2")); - ASSERT_EQUALS("12f+3+", testAst("1+f(2)+3")); - ASSERT_EQUALS("123,f+4+", testAst("1+f(2,3)+4")); - ASSERT_EQUALS("12a&,f+", testAst("1+f(2,&a)")); + ASSERT_EQUALS("1f(+2+", testAst("1+f()+2")); + ASSERT_EQUALS("1f2(+3+", testAst("1+f(2)+3")); + ASSERT_EQUALS("1f23,(+4+", testAst("1+f(2,3)+4")); + ASSERT_EQUALS("1f2a&,(+", testAst("1+f(2,&a)")); + testAst("extern unsigned f(const char *);"); // don't crash + testAst("extern void f(const char *format, ...);"); // don't crash + testAst("extern int for_each_commit_graft(int (*)(int*), void *);"); // don't crash + testAst("for (;;) {}"); // don't crash } void asttemplate() const { // uninstantiated templates will have <,>,etc.. how do we handle them?