From fa8e5cd7d8f2d416a0d1096b0241fc35cc2a24e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Sat, 15 Dec 2012 20:21:09 +0100 Subject: [PATCH] AST: Simple AST. Not used in the clients. Only part of the testing for now. --- lib/token.cpp | 43 +++++++++++++++++++++++++++++++++++- lib/token.h | 32 +++++++++++++++++++++++++++ lib/tokenlist.cpp | 51 +++++++++++++++++++++++++++++++++++++++++++ lib/tokenlist.h | 2 ++ test/testtokenize.cpp | 43 ++++++++++++++++++++++++++++++++++++ 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/lib/token.cpp b/lib/token.cpp index cc2c1ad51..efd3f2331 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -46,7 +46,10 @@ Token::Token(Token **t) : _isLong(false), _isUnused(false), _isStandardType(false), - _isExpandedMacro(false) + _isExpandedMacro(false), + _astOperand1(NULL), + _astOperand2(NULL), + _astParent(NULL) { } @@ -1045,3 +1048,41 @@ std::string Token::stringifyList(bool varid) const { return stringifyList(varid, false, true, true, true, 0, 0); } + +void Token::astOperand1(Token *tok) +{ + if (tok->_astParent) { + tok->_astParent->_astOperand2 = this; + _astParent = tok->_astParent; + } + tok->_astParent = this; + _astOperand1 = tok; +} + +void Token::astOperand2(Token *tok) +{ + if (tok->_astParent) { + tok->_astParent->_astOperand1 = this; + _astParent = tok->_astParent; + } + _astOperand2 = tok; + tok->_astParent = this; +} + +void Token::astHandleParenthesis() +{ + Token *innerTop = (_str == "(") ? _next : _previous; + while (innerTop->_astParent) + innerTop = innerTop->_astParent; + + if (_astParent) { + if (_str == "(") + _astParent->_astOperand2 = innerTop; + else + _astParent->_astOperand1 = innerTop; + innerTop->_astParent = _astParent; + } else { + _astParent = innerTop; + } +} + diff --git a/lib/token.h b/lib/token.h index 1745af192..cc073824f 100644 --- a/lib/token.h +++ b/lib/token.h @@ -542,6 +542,38 @@ private: /** Update internal property cache about isStandardType() */ void update_property_isStandardType(); + + // AST.. + Token *_astOperand1; + Token *_astOperand2; + Token *_astParent; +public: + void astOperand1(Token *tok); + void astOperand2(Token *tok); + void astHandleParenthesis(); + void astHandleBrackets(); + + const Token * astOperand1() const { + return _astOperand1; + } + const Token * astOperand2() const { + return _astOperand2; + } + const Token *astTop() const { + const Token *ret = this; + while (ret->_astParent) + ret = ret->_astParent; + return ret; + } + + std::string astString() const { + std::string ret; + if (_astOperand1) + ret = _astOperand1->astString(); + if (_astOperand2) + ret += _astOperand2->astString(); + return ret+_str; + } }; /// @} diff --git a/lib/tokenlist.cpp b/lib/tokenlist.cpp index d2c9c4948..6b6740ff5 100644 --- a/lib/tokenlist.cpp +++ b/lib/tokenlist.cpp @@ -339,6 +339,57 @@ bool TokenList::createTokens(std::istream &code, const std::string& file0) //--------------------------------------------------------------------------- +void TokenList::createAst() +{ + // operators that must be ordered according to C-precedence + const char * const operators[] = { + " :: ", + " [ . ++ -- ", + "> ++ -- + - ! ~ * & ", // prefix unary operators, from right to left + " || ", + " && ", + " | ", + " ^ ", + " & ", + " == != ", + " < <= > >= ", + " << >> ", + " + - ", + " * / % " + }; + + for (unsigned int i = 0; i < sizeof(operators) / sizeof(*operators); ++i) { + // TODO: extract operators to std::set - that should be faster + if (*operators[i] == '>') { + const std::string op(1+operators[i]); + Token *tok = _front; + while (tok->next()) + tok = tok->next(); + for (; tok; tok = tok->previous()) { + if ((!tok->previous() || tok->previous()->isOp()) && + op.find(" "+tok->str()+" ")!=std::string::npos) { + tok->astOperand1(tok->next()); + } + } + } else { + const std::string op(operators[i]); + for (Token *tok = _front; tok; tok = tok->next()) { + if (tok->astOperand1()==NULL && op.find(" "+tok->str()+" ")!=std::string::npos) { + tok->astOperand1(tok->previous()); + tok->astOperand2(tok->next()); + } + } + } + } + + // parentheses.. + for (Token *tok = _front; tok; tok = tok->next()) { + if (tok->str() == "(" || tok->str() == ")" || tok->str() == "]") { + tok->astHandleParenthesis(); + } + } +} + const std::string& TokenList::file(const Token *tok) const { return _files.at(tok->fileIndex()); diff --git a/lib/tokenlist.h b/lib/tokenlist.h index 2821f77a5..df56a83de 100644 --- a/lib/tokenlist.h +++ b/lib/tokenlist.h @@ -105,6 +105,8 @@ public: */ std::string fileLine(const Token *tok) const; + void createAst(); + private: /** Disable copy constructor, no implementation */ TokenList(const TokenList &); diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index 149c8f65f..1b0fae3ef 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -459,6 +459,12 @@ private: TEST_CASE(platformUnix64); TEST_CASE(simplifyMathExpressions); //ticket #1620 + + // AST data + TEST_CASE(astexpr); + TEST_CASE(astpar); + TEST_CASE(astbrackets); + TEST_CASE(astunaryop); } std::string tokenizeAndStringify(const char code[], bool simplify = false, bool expand = true, Settings::PlatformType platform = Settings::Unspecified, const char* filename = "test.cpp", bool cpp11 = true) { @@ -7619,6 +7625,43 @@ private: ASSERT_EQUALS(expected1, tokenizeAndStringify(code1)); } + + + + + std::string testAst(const char code[]) { + // tokenize given code.. + TokenList tokenList(NULL); + std::istringstream istr(code); + if (!tokenList.createTokens(istr,"test.cpp")) + return "ERROR"; + + // Create AST.. + tokenList.createAst(); + + return tokenList.front()->astTop()->astString(); + } + + void astexpr() { + ASSERT_EQUALS("123++", testAst("1+2+3")); + ASSERT_EQUALS("12*3+", testAst("1*2+3")); + ASSERT_EQUALS("123*+", testAst("1+2*3")); + ASSERT_EQUALS("12*34*+", testAst("1*2+3*4")); + } + + void astpar() { + ASSERT_EQUALS("12+3*", testAst("(1+2)*3")); + ASSERT_EQUALS("123+*", testAst("1*(2+3)")); + ASSERT_EQUALS("123+*4*", testAst("1*(2+3)*4")); + } + + void astbrackets() { + ASSERT_EQUALS("123+[4+", testAst("1[2+3]+4")); + } + + void astunaryop() { + ASSERT_EQUALS("12-+", testAst("1+-2")); + } }; REGISTER_TEST(TestTokenizer)