From 9f81b48dc13423fd220a6d71303958e4cb3d86c4 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Mon, 30 Jan 2012 23:41:43 +0100 Subject: [PATCH] Fixed ticket #3558 (Tokenizer: improve simplifyVarDecl to take count of undefined size VLA's). --- lib/tokenize.cpp | 190 ++++++++++++++++++++---------------------- test/testtokenize.cpp | 20 +++++ 2 files changed, 109 insertions(+), 101 deletions(-) diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index babbbd56a..ea8faed1e 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -2005,11 +2005,6 @@ bool Tokenizer::tokenize(std::istream &code, // simplify weird but legal code: "[;{}] ( { code; } ) ;"->"[;{}] code;" simplifyRoundCurlyParenthesis(); - // Convert K&R function declarations to modern C - simplifyVarDecl(true); - if (!simplifyFunctionParameters()) - return false; - // check for simple syntax errors.. for (const Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok, "> struct {") && @@ -2020,9 +2015,6 @@ bool Tokenizer::tokenize(std::istream &code, } } - // specify array size.. - arraySize(); - simplifyDoWhileAddBraces(); if (!simplifyIfAddBraces()) @@ -2075,6 +2067,14 @@ bool Tokenizer::tokenize(std::istream &code, } } + // Convert K&R function declarations to modern C + simplifyVarDecl(true); + if (!simplifyFunctionParameters()) + return false; + + // specify array size.. + arraySize(); + // simplify labels and 'case|default'-like syntaxes simplifyLabelsCaseDefault(); @@ -2279,6 +2279,9 @@ bool Tokenizer::tokenize(std::istream &code, // Split up variable declarations. simplifyVarDecl(false); + // specify array size.. needed when arrays are split + arraySize(); + // f(x=g()) => x=g(); f(x) simplifyAssignmentInFunctionCall(); @@ -5126,10 +5129,12 @@ void Tokenizer::simplifyVarDecl(bool only_k_r_fpar) bool isconst = false; bool isstatic = false; + bool ispointer = false; Token *tok2 = type0; unsigned int typelen = 1; - while (Token::Match(tok2, "%type% %type% *| *| %var%")) { + //check if variable is declared 'const' or 'static' or both + while (Token::Match(tok2, "const|static")) { if (tok2->str() == "const") isconst = true; @@ -5140,94 +5145,27 @@ void Tokenizer::simplifyVarDecl(bool only_k_r_fpar) ++typelen; } - // Don't split up const declaration.. - if (isconst && Token::Match(tok2, "%type% %var% =")) - continue; - // strange looking variable declaration => don't split up. if (Token::Match(tok2, "%type% *| %var% , %type% *| %var%")) continue; + if (Token::Match(tok2, "struct|class %type%")) { + tok2 = tok2->next(); + ++typelen; + } + // check for qualification.. if (Token::Match(tok2, ":: %type%")) { ++typelen; tok2 = tok2->next(); } - if (Token::Match(tok2, "%type% :: %type%")) { - while (tok2 && Token::Match(tok2, "%type% ::")) { - typelen += 2; - tok2 = tok2->tokAt(2); - } - } - - if (Token::Match(tok2, "%type% *| %var% ,|=")) { - const bool isPointer = (tok2->next()->str() == "*"); - const Token *varName = tok2->tokAt((isPointer ? 2 : 1)); - - if (varName->str() != "operator") { - tok2 = varName->next(); // The ',' or '=' token - - if (isstatic && tok2->str() == "=") { - if (Token::Match(tok2->next(), "%num% ,")) - tok2 = tok2->tokAt(2); - else - tok2 = NULL; - } - } else - tok2 = NULL; - } - - else if (Token::Match(tok2, "%type% * * %var% ,|=")) { - if (tok2->strAt(3) != "operator") - tok2 = tok2->tokAt(4); // The ',' token - else - tok2 = NULL; - } - - else if (Token::Match(tok2, "%type% * const %var% ,|=")) { - if (tok2->strAt(3) != "operator") { - tok2 = tok2->tokAt(4); // The ',' token - } else { - tok2 = NULL; - } - } - - else if (Token::Match(tok2, "%type% %var% [ %any% ] ,|=|[")) { - tok2 = tok2->tokAt(2); - if (tok2->next()->isName() || tok2->next()->isNumber()) { - tok2 = tok2->link()->next(); // The ',' token - while (Token::Match(tok2, "[ %any% ]") && - (tok2->next()->isName() || tok2->next()->isNumber())) - tok2 = tok2->link()->next(); - if (!Token::Match(tok2, "=|,")) { - tok2 = NULL; - } - - if (tok2 && tok2->str() == "=") { - while (tok2 && tok2->str() != "," && tok2->str() != ";") { - if (tok2->str() == "{") - tok2 = tok2->link(); - tok2 = tok2->next(); - } - if (tok2 && tok2->str() == ";") - tok2 = NULL; - } - } else - tok2 = NULL; - } - - else if (Token::Match(tok2, "%type% * %var% [ %any% ] ,")) { - tok2 = tok2->tokAt(3); - if (tok2->next()->isName() || tok2->next()->isNumber()) - tok2 = tok2->link()->next(); // The ',' token - else - tok2 = NULL; - } - - else if (Token::Match(tok2, "%type% <")) { + //skip combinations of templates and namespaces + while (Token::Match(tok2, "%type% <") || Token::Match(tok2, "%type% ::")) { typelen += 2; tok2 = tok2->tokAt(2); + if (tok2 && tok2->previous()->str() == "::") + continue; size_t indentlevel = 1; for (Token *tok3 = tok2; tok3; tok3 = tok3->next()) { @@ -5246,27 +5184,77 @@ void Tokenizer::simplifyVarDecl(bool only_k_r_fpar) } } - if (!tok2) // syntax error - break; - - if (Token::Match(tok2, ":: %type%")) { - typelen += 2; - tok2 = tok2->tokAt(2); - } - - if (!tok2) // syntax error - break; - - if (tok2->str() == "*") { + if (Token::Match(tok2, ":: %type%")) { + ++typelen; tok2 = tok2->next(); } + } - if (Token::Match(tok2, "%var% ,|=")) { - tok2 = tok2->next(); // The ',' token + //pattern: "%type% *| ... *| const| %var% ,|=" + if (Token::Match(tok2, "%type%") || + (tok2 && tok2->previous() && tok2->previous()->str() == ">")) { + Token *varName = tok2; + if (!tok2->previous() || tok2->previous()->str() != ">") + varName = varName->next(); + else --typelen; - } else { - tok2 = NULL; + //skip all the pointer part + while (varName && varName->str() == "*") { + ispointer = true; + varName = varName->next(); } + + while (Token::Match(varName, "%type% %type%")) { + if (varName->str() != "const") { + ++typelen; + } + varName = varName->next(); + } + //non-VLA case + if (Token::Match(varName, "%var% ,|=")) { + if (varName->str() != "operator") { + tok2 = varName->next(); // The ',' or '=' token + + if (tok2->str() == "=") { + if (isstatic) { + if (Token::Match(tok2->next(), "%num% ,")) + tok2 = tok2->tokAt(2); + else + tok2 = NULL; + } else if (isconst && !ispointer) { + //do not split const non-pointer variables.. + while (tok2 && tok2->str() != "," && tok2->str() != ";") { + if (tok2->str() == "{" || tok2->str() == "(" || tok2->str() == "[") + tok2 = tok2->link(); + tok2 = tok2->next(); + } + if (tok2 && tok2->str() == ";") + tok2 = NULL; + } + } + } else + tok2 = NULL; + } + + //VLA case + else if (Token::Match(varName, "%var% [")) { + tok2 = varName->next(); + + while (Token::Match(tok2->link(), "] ,|=|[")) + tok2 = tok2->link()->next(); + if (!Token::Match(tok2, "=|,")) + tok2 = NULL; + if (tok2 && tok2->str() == "=") { + while (tok2 && tok2->str() != "," && tok2->str() != ";") { + if (tok2->str() == "{" || tok2->str() == "(" || tok2->str() == "[") + tok2 = tok2->link(); + tok2 = tok2->next(); + } + if (tok2 && tok2->str() == ";") + tok2 = NULL; + } + } else + tok2 = NULL; } else { tok2 = NULL; } diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index 49edc2040..6eb1371af 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -281,6 +281,9 @@ private: TEST_CASE(vardecl12); TEST_CASE(vardecl13); TEST_CASE(vardecl14); + TEST_CASE(vardecl15); + TEST_CASE(vardecl16); + TEST_CASE(vardecl17); TEST_CASE(vardecl_stl_1); TEST_CASE(vardecl_stl_2); TEST_CASE(vardecl_template); @@ -4553,6 +4556,23 @@ private: ASSERT_EQUALS(":: std :: tr1 :: shared_ptr < int > pNum1 ; :: std :: tr1 :: shared_ptr < int > pNum2 ;", tokenizeAndStringify(code)); } + void vardecl15() { + const char code[] = "const char x[] = \"foo\", y[] = \"bar\";\n"; + ASSERT_EQUALS("const char x [ 4 ] = \"foo\" ; const char y [ 4 ] = \"bar\" ;", tokenizeAndStringify(code)); + } + + void vardecl16() { + const char code[] = "const a::b::g::h::l *x [] = foo(),y [][] = bar();\n"; + ASSERT_EQUALS("const a :: b < c , d ( e ) , f > :: g :: h < i > :: l * x [ ] = foo ( ) ; " + "const a :: b < c , d ( e ) , f > :: g :: h < i > :: l y [ ] [ ] = bar ( ) ;", tokenizeAndStringify(code)); + } + + void vardecl17() { + const char code[] = "a < b > :: c :: d :: e < f > x = foo(), y = bar();\n"; + ASSERT_EQUALS("a < b > :: c :: d :: e < f > x ; x = foo ( ) ; " + "a < b > :: c :: d :: e < f > y ; y = bar ( ) ;", tokenizeAndStringify(code)); + } + void volatile_variables() { const char code[] = "volatile int a=0;\n" "volatile int b=0;\n"