From 75fbe310fffe781f8c788d28b76ccd538765181e Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Wed, 4 Jan 2012 12:55:51 +0100 Subject: [PATCH] Extract various functions inside the Tokenizer class and fix tokenization of 'return __LINE__ ;'. --- lib/tokenize.cpp | 153 ++++++++++++++++++++++++++---------------- lib/tokenize.h | 12 ++++ test/testtokenize.cpp | 5 ++ 3 files changed, 111 insertions(+), 59 deletions(-) diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index 8b240d15c..6626f182f 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -1960,78 +1960,31 @@ bool Tokenizer::tokenize(std::istream &code, } // replace inline SQL with "asm()" (Oracle PRO*C). Ticket: #1959 - for (Token *tok = _tokens; tok; tok = tok->next()) { - if (Token::simpleMatch(tok, "EXEC SQL")) { - // delete all tokens until ";" - const Token *end = tok->tokAt(2); - while (end && end->str() != ";") - end = end->next(); - - std::string instruction = tok->stringify(end); - Token::eraseTokens(tok, end); - - // insert "asm ( "instruction" ) ;" - tok->str("asm"); - // it can happen that 'end' is NULL when wrong code is inserted - if (!tok->next()) - tok->insertToken(";"); - tok->insertToken(")"); - tok->insertToken("\"" + instruction + "\""); - tok->insertToken("("); - // jump to ';' and continue - tok = tok->tokAt(3); - } - } + simplifySQL(); // Simplify JAVA/C# code if (isJavaOrCSharp()) simplifyJavaAndCSharp(); + // Concatenate double sharp: 'a ## b' -> 'ab' + concatenateDoubleSharp(); + if (!createLinks()) { // Source has syntax errors, can't proceed return false; } - //easy simplifications... - for (Token *tok = _tokens; tok; tok = tok->next()) { + // replace __LINE__ macro with line number + simplifyLineMacro(); - // replace __LINE__ macro with line number - if (tok->str() == "__LINE__") - tok->str(MathLib::toString(tok->linenr())); + // replace 'NULL' and similiar '0'-defined macros with '0' + simplifyNull(); - // 'double sharp' token concatenation - // TODO: pattern should be "%var%|%num% ## %var%|%num%" - while (Token::Match(tok, "%any% ## %any%") && - (tok->isName() || tok->isNumber()) && - (tok->tokAt(2)->isName() || tok->tokAt(2)->isNumber())) { - tok->str(tok->str() + tok->strAt(2)); - tok->deleteNext(2); - } - - //Replace NULL with 0.. - if (tok->str() == "NULL" || tok->str() == "__null" || - tok->str() == "'\\0'" || tok->str() == "'\\x0'") { - tok->str("0"); - } else if (tok->isNumber() && - MathLib::isInt(tok->str()) && - MathLib::toLongNumber(tok->str()) == 0) { - tok->str("0"); - } - - // Combine "- %num%" .. - if (Token::Match(tok, "?|:|,|(|[|=|return|case|sizeof|%op% - %num%")) { - tok->deleteNext(); - tok->next()->str("-" + tok->next()->str()); - } - - // simplify round "(" parenthesis between "[;{}] and "{" - if (Token::Match(tok, "[;{}] ( {") && - Token::simpleMatch(tok->linkAt(2), "} ) ;")) { - tok->linkAt(2)->previous()->deleteNext(2); - tok->deleteNext(2); - } - } + // combine "- %num%" + concatenateNegativeNumber(); + // simplify weird but legal code: "[;{}] ( { code; } ) ;"->"[;{}] code;" + simplifyRoundCurlyParenthesis(); // Convert K&R function declarations to modern C simplifyVarDecl(true); @@ -2446,6 +2399,88 @@ bool Tokenizer::hasEnumsWithTypedef() return false; } +void Tokenizer::concatenateDoubleSharp() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + // TODO: pattern should be "%var%|%num% ## %var%|%num%" + while (Token::Match(tok, "%any% ## %any%") && + (tok->isName() || tok->isNumber()) && + (tok->tokAt(2)->isName() || tok->tokAt(2)->isNumber())) { + tok->str(tok->str() + tok->strAt(2)); + tok->deleteNext(2); + } + } +} + +void Tokenizer::simplifyLineMacro() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + if (tok->str() == "__LINE__") + tok->str(MathLib::toString(tok->linenr())); + } +} + +void Tokenizer::simplifyNull() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + if (tok->str() == "NULL" || tok->str() == "__null" || + tok->str() == "'\\0'" || tok->str() == "'\\x0'") { + tok->str("0"); + } else if (tok->isNumber() && + MathLib::isInt(tok->str()) && + MathLib::toLongNumber(tok->str()) == 0) { + tok->str("0"); + } + } +} + +void Tokenizer::concatenateNegativeNumber() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + if (Token::Match(tok, "?|:|,|(|[|=|return|case|sizeof|%op% - %num%")) { + tok->deleteNext(); + tok->next()->str("-" + tok->next()->str()); + } + } +} + +void Tokenizer::simplifyRoundCurlyParenthesis() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + while (Token::Match(tok, "[;{}] ( {") && + Token::simpleMatch(tok->linkAt(2), "} ) ;")) { + tok->linkAt(2)->previous()->deleteNext(3); + tok->deleteNext(2); + } + } +} + +void Tokenizer::simplifySQL() +{ + for (Token *tok = _tokens; tok; tok = tok->next()) { + if (Token::simpleMatch(tok, "EXEC SQL")) { + const Token *end = tok->tokAt(2); + while (end && end->str() != ";") + end = end->next(); + + std::string instruction = tok->stringify(end); + // delete all tokens until ';' + Token::eraseTokens(tok, end); + + // insert "asm ( "instruction" ) ;" + tok->str("asm"); + // it can happen that 'end' is NULL when wrong code is inserted + if (!tok->next()) + tok->insertToken(";"); + tok->insertToken(")"); + tok->insertToken("\"" + instruction + "\""); + tok->insertToken("("); + // jump to ';' and continue + tok = tok->tokAt(3); + } + } +} + void Tokenizer::simplifyDebugNew() { // convert Microsoft DEBUG_NEW macro to new diff --git a/lib/tokenize.h b/lib/tokenize.h index 06300c85b..7ed77632a 100644 --- a/lib/tokenize.h +++ b/lib/tokenize.h @@ -481,8 +481,20 @@ public: void simplifyParameterVoid(); + void concatenateDoubleSharp(); + + void simplifyLineMacro(); + + void simplifyNull(); + + void concatenateNegativeNumber(); + + void simplifyRoundCurlyParenthesis(); + void simplifyDebugNew(); + void simplifySQL(); + bool hasEnumsWithTypedef(); void simplifyDefaultAndDeleteInsideClass(); diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index b1b3d4fca..418169821 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -54,6 +54,7 @@ private: TEST_CASE(tokenize20); // replace C99 _Bool => bool TEST_CASE(tokenize21); // tokenize 0x0E-7 TEST_CASE(tokenize22); // special marker $ from preprocessor + TEST_CASE(tokenize23); // tokenize "return - __LINE__;" // don't freak out when the syntax is wrong TEST_CASE(wrong_syntax); @@ -611,6 +612,10 @@ private: ASSERT_EQUALS("a b", tokenizeAndStringify("a$b")); } + void tokenize23() { // tokenize 'return - __LINE__' correctly + ASSERT_EQUALS("return -1 ;", tokenizeAndStringify("return - __LINE__;")); + } + void wrong_syntax() { { errout.str("");