cppcheck/tokenize.cpp

1534 lines
42 KiB
C++
Raw Normal View History

2008-11-20 20:23:05 +01:00
/*
2008-10-26 08:55:15 +01:00
* c++check - c/c++ syntax checking
* Copyright (C) 2007 Daniel Marjamäki
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/
*/
2007-05-24 07:40:45 +02:00
//---------------------------------------------------------------------------
#include "tokenize.h"
2007-05-24 07:40:45 +02:00
//---------------------------------------------------------------------------
#include <locale>
#include <fstream>
#include <string>
#include <cstring>
#include <iostream>
#include <sstream>
#include <list>
#include <algorithm>
2007-05-24 07:40:45 +02:00
#include <stdlib.h> // <- strtoul
#include <stdio.h>
2007-05-24 07:40:45 +02:00
2008-02-18 18:11:34 +01:00
#ifdef __BORLANDC__
#include <ctype.h>
2008-02-18 18:11:34 +01:00
#include <mem.h>
#endif
#ifndef _MSC_VER
#define _strdup(str) strdup(str)
#endif
2007-05-24 07:40:45 +02:00
//---------------------------------------------------------------------------
Tokenizer::Tokenizer(ErrorLogger *errorLogger)
{
_tokens = 0;
tokens_back = 0;
dsymlist = 0;
_errorLogger = errorLogger;
}
Tokenizer::~Tokenizer()
{
DeallocateTokens();
}
//---------------------------------------------------------------------------
// Helper functions..
TOKEN *Tokenizer::_gettok(TOKEN *tok, int index)
{
while (tok && index>0)
{
tok = tok->next;
index--;
}
return tok;
}
//---------------------------------------------------------------------------
const TOKEN *Tokenizer::tokens() const
{
return _tokens;
}
//---------------------------------------------------------------------------
// Defined symbols.
// "#define abc 123" will create a defined symbol "abc" with the value 123
//---------------------------------------------------------------------------
std::vector<std::string> *Tokenizer::getFiles()
{
return &Files;
}
2007-05-24 07:40:45 +02:00
void Tokenizer::Define(const char Name[], const char Value[])
2007-05-24 07:40:45 +02:00
{
if (!(Name && Name[0]))
return;
if (!(Value && Value[0]))
return;
// Is 'Value' a decimal value..
bool dec = true, hex = true;
for (int i = 0; Value[i]; i++)
{
if ( ! isdigit(Value[i]) )
2007-05-24 07:40:45 +02:00
dec = false;
if ( ! isxdigit(Value[i]) && (!(i==1 && Value[i]=='x')))
2007-05-24 07:40:45 +02:00
hex = false;
}
if (!dec && !hex)
return;
char *strValue = _strdup(Value);
2007-05-24 07:40:45 +02:00
if (!dec && hex)
{
// Convert Value from hexadecimal to decimal
unsigned long value;
std::istringstream istr(Value+2);
istr >> std::hex >> value;
std::ostringstream ostr;
ostr << value;
2007-05-24 07:40:45 +02:00
free(strValue);
strValue = _strdup(ostr.str().c_str());
2007-05-24 07:40:45 +02:00
}
DefineSymbol *NewSym = new DefineSymbol;
memset(NewSym, 0, sizeof(DefineSymbol));
NewSym->name = _strdup(Name);
2007-05-24 07:40:45 +02:00
NewSym->value = strValue;
NewSym->next = dsymlist;
dsymlist = NewSym;
}
//---------------------------------------------------------------------------
2007-05-24 07:40:45 +02:00
//---------------------------------------------------------------------------
// addtoken
// add a token. Used by 'Tokenizer'
//---------------------------------------------------------------------------
2007-05-24 07:40:45 +02:00
void Tokenizer::addtoken(const char str[], const unsigned int lineno, const unsigned int fileno)
2007-05-24 07:40:45 +02:00
{
if (str[0] == 0)
return;
// Replace hexadecimal value with decimal
std::ostringstream str2;
if (strncmp(str,"0x",2)==0)
2007-05-24 07:40:45 +02:00
{
str2 << strtoul(str+2, NULL, 16);
2007-05-24 07:40:45 +02:00
}
else
{
str2 << str;
}
2007-05-24 07:40:45 +02:00
TOKEN *newtoken = new TOKEN;
newtoken->setstr(str2.str().c_str());
2007-05-24 07:40:45 +02:00
newtoken->linenr = lineno;
newtoken->FileIndex = fileno;
if (tokens_back)
{
tokens_back->next = newtoken;
tokens_back = newtoken;
}
else
{
_tokens = tokens_back = newtoken;
2007-05-24 07:40:45 +02:00
}
// Check if str is defined..
for (DefineSymbol *sym = dsymlist; sym; sym = sym->next)
{
if (strcmp(str,sym->name)==0)
{
newtoken->setstr(sym->value);
2007-05-24 07:40:45 +02:00
break;
}
}
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// combine_2tokens
// Combine two tokens that belong to each other. Ex: "<" and "=" may become "<="
//---------------------------------------------------------------------------
void Tokenizer::combine_2tokens(TOKEN *tok, const char str1[], const char str2[])
2007-05-24 07:40:45 +02:00
{
if (!(tok && tok->next))
return;
if (strcmp(tok->str,str1) || strcmp(tok->next->str,str2))
return;
std::string newstr(std::string(str1) + std::string(str2));
tok->setstr( newstr.c_str() );
2007-05-24 07:40:45 +02:00
DeleteNextToken(tok);
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// SizeOfType - gives the size of a type
//---------------------------------------------------------------------------
int Tokenizer::SizeOfType(const char type[])
{
if (!type)
return 0;
return TypeSize[type];
2007-05-24 07:40:45 +02:00
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// DeleteNextToken. Unlink and delete next token.
//---------------------------------------------------------------------------
void Tokenizer::DeleteNextToken(TOKEN *tok)
{
TOKEN *next = tok->next;
tok->next = next->next;
delete next;
}
//---------------------------------------------------------------------------
2007-05-29 08:24:36 +02:00
//---------------------------------------------------------------------------
// InsertTokens - Copy and insert tokens
//---------------------------------------------------------------------------
void Tokenizer::InsertTokens(TOKEN *dest, TOKEN *src, unsigned int n)
2007-05-29 08:24:36 +02:00
{
while (n > 0)
{
TOKEN *NewToken = new TOKEN;
NewToken->FileIndex = src->FileIndex;
NewToken->linenr = src->linenr;
NewToken->setstr(src->str);
2007-05-29 08:24:36 +02:00
NewToken->next = dest->next;
dest->next = NewToken;
dest = dest->next;
src = src->next;
n--;
}
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// Tokenize - tokenizes a given file.
//---------------------------------------------------------------------------
void Tokenizer::Tokenize(std::istream &code, const char FileName[])
2007-05-24 07:40:45 +02:00
{
// Has this file been tokenized already?
for (unsigned int i = 0; i < Files.size(); i++)
{
if ( SameFileName( Files[i].c_str(), FileName ) )
2007-05-24 07:40:45 +02:00
return;
}
// The "Files" vector remembers what files have been tokenized..
2007-05-24 07:40:45 +02:00
Files.push_back(FileName);
2008-02-16 16:46:32 +01:00
// Tokenize the file..
TokenizeCode( code, (unsigned int)(Files.size() - 1) );
2008-02-16 16:46:32 +01:00
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// Tokenize - tokenizes input stream
//---------------------------------------------------------------------------
void Tokenizer::TokenizeCode(std::istream &code, const unsigned int FileIndex)
2008-02-16 16:46:32 +01:00
{
// Tokenize the file.
2007-05-24 07:40:45 +02:00
unsigned int lineno = 1;
std::string CurrentToken;
for (char ch = (char)code.get(); code.good(); ch = (char)code.get())
2007-05-24 07:40:45 +02:00
{
// Todo
if ( ch < 0 )
continue;
// Preprocessor stuff?
if (ch == '#' && CurrentToken.empty())
2007-05-24 07:40:45 +02:00
{
std::string line("#");
{
char chPrev = '#';
while ( code.good() )
{
ch = (char)code.get();
if (chPrev!='\\' && ch=='\n')
break;
if (ch!=' ')
chPrev = ch;
if (ch!='\\' && ch!='\n')
line += ch;
if (ch=='\n')
++lineno;
}
}
2007-05-24 07:40:45 +02:00
if (strncmp(line.c_str(),"#include",8)==0 &&
line.find("\"") != std::string::npos)
{
// Extract the filename
line.erase(0, line.find("\"")+1);
line.erase(line.find("\""));
// Relative path..
2008-02-16 16:46:32 +01:00
if (Files.back().find_first_of("\\/") != std::string::npos)
2007-05-24 07:40:45 +02:00
{
2008-02-16 16:46:32 +01:00
std::string path = Files.back();
path.erase( 1 + path.find_last_of("\\/") );
2007-05-24 07:40:45 +02:00
line = path + line;
}
2008-02-16 16:46:32 +01:00
addtoken("#include", lineno, FileIndex);
addtoken(line.c_str(), lineno, FileIndex);
2007-05-24 07:40:45 +02:00
std::ifstream fin( line.c_str() );
Tokenize(fin, line.c_str());
2007-05-24 07:40:45 +02:00
}
else if (strncmp(line.c_str(), "#define", 7) == 0)
{
std::string strId;
2007-05-24 07:40:45 +02:00
enum {Space1, Id, Space2, Value} State;
State = Space1;
for (unsigned int i = 8; i < line.length(); i++)
{
if (State==Space1 || State==Space2)
{
if (isspace(line[i]))
2007-05-24 07:40:45 +02:00
continue;
State = (State==Space1) ? Id : Value;
}
else if (State==Id)
2007-05-24 07:40:45 +02:00
{
if ( isspace( line[i] ) )
{
strId = CurrentToken;
CurrentToken.clear();
State = Space2;
continue;
}
else if ( ! isalnum(line[i]) )
{
break;
}
2007-05-24 07:40:45 +02:00
}
CurrentToken += line[i];
2007-05-24 07:40:45 +02:00
}
if (State==Value)
{
2008-02-16 16:46:32 +01:00
addtoken("def", lineno, FileIndex);
addtoken(strId.c_str(), lineno, FileIndex);
2008-02-16 16:46:32 +01:00
addtoken(";", lineno, FileIndex);
Define(strId.c_str(), CurrentToken.c_str());
2007-05-24 07:40:45 +02:00
}
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
}
else
{
2008-02-16 16:46:32 +01:00
addtoken("#", lineno, FileIndex);
addtoken(";", lineno, FileIndex);
}
2007-05-24 07:40:45 +02:00
lineno++;
continue;
}
if (ch == '\n')
{
// Add current token..
addtoken(CurrentToken.c_str(), lineno++, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
continue;
}
// Comments..
if (ch == '/' && code.good())
2007-05-24 07:40:45 +02:00
{
bool newstatement = bool( strchr(";{}", CurrentToken.empty() ? '\0' : CurrentToken[0]) != NULL );
2007-05-24 07:40:45 +02:00
// Add current token..
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
// Read next character..
2008-02-16 16:46:32 +01:00
ch = (char)code.get();
2007-05-24 07:40:45 +02:00
// If '//'..
if (ch == '/')
{
std::string comment;
getline( code, comment ); // Parse in the whole comment
// If the comment says something like "fred is deleted" then generate appropriate tokens for that
comment = comment + " ";
if ( newstatement && comment.find(" deleted ")!=std::string::npos )
{
// delete
addtoken( "delete", lineno, FileIndex );
// fred
std::string::size_type pos1 = comment.find_first_not_of(" \t");
std::string::size_type pos2 = comment.find(" ", pos1);
std::string firstWord = comment.substr( pos1, pos2-pos1 );
addtoken( firstWord.c_str(), lineno, FileIndex );
// ;
addtoken( ";", lineno, FileIndex );
}
2007-05-24 07:40:45 +02:00
lineno++;
continue;
}
// If '/*'..
if (ch == '*')
{
char chPrev;
ch = chPrev = 'A';
while (code.good() && (chPrev!='*' || ch!='/'))
2007-05-24 07:40:45 +02:00
{
chPrev = ch;
2008-02-16 16:46:32 +01:00
ch = (char)code.get();
2007-05-24 07:40:45 +02:00
if (ch == '\n')
lineno++;
}
continue;
}
// Not a comment.. add token..
2008-02-16 16:46:32 +01:00
addtoken("/", lineno, FileIndex);
2007-05-24 07:40:45 +02:00
}
// char..
if (ch == '\'')
{
// Add previous token
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
// Read this ..
CurrentToken += ch;
CurrentToken += (char)code.get();
CurrentToken += (char)code.get();
2007-05-24 07:40:45 +02:00
if (CurrentToken[1] == '\\')
CurrentToken += (char)code.get();
2007-05-24 07:40:45 +02:00
// Add token and start on next..
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
continue;
}
// String..
if (ch == '\"')
{
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
bool special = false;
char c = ch;
do
{
// Append token..
CurrentToken += c;
2007-05-24 07:40:45 +02:00
// Special sequence '\.'
if (special)
special = false;
else
special = (c == '\\');
// Get next character
2008-02-16 16:46:32 +01:00
c = (char)code.get();
2007-05-24 07:40:45 +02:00
}
while (code.good() && (special || c != '\"'));
CurrentToken += '\"';
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
continue;
}
if (strchr("+-*/%&|^?!=<>[](){};:,.",ch))
{
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
CurrentToken += ch;
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
continue;
}
if (isspace(ch) || iscntrl(ch))
2007-05-24 07:40:45 +02:00
{
addtoken(CurrentToken.c_str(), lineno, FileIndex);
CurrentToken.clear();
2007-05-24 07:40:45 +02:00
continue;
}
CurrentToken += ch;
2007-05-24 07:40:45 +02:00
}
addtoken( CurrentToken.c_str(), lineno, FileIndex );
2007-05-24 07:40:45 +02:00
// Combine tokens..
for (TOKEN *tok = _tokens; tok && tok->next; tok = tok->next)
2007-05-24 07:40:45 +02:00
{
combine_2tokens(tok, "<", "<");
combine_2tokens(tok, ">", ">");
combine_2tokens(tok, "&", "&");
combine_2tokens(tok, "|", "|");
combine_2tokens(tok, "+", "=");
combine_2tokens(tok, "-", "=");
combine_2tokens(tok, "*", "=");
combine_2tokens(tok, "/", "=");
combine_2tokens(tok, "&", "=");
combine_2tokens(tok, "|", "=");
combine_2tokens(tok, "=", "=");
combine_2tokens(tok, "!", "=");
combine_2tokens(tok, "<", "=");
combine_2tokens(tok, ">", "=");
combine_2tokens(tok, ":", ":");
combine_2tokens(tok, "-", ">");
combine_2tokens(tok, "private", ":");
combine_2tokens(tok, "protected", ":");
combine_2tokens(tok, "public", ":");
}
// Replace "->" with "."
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
{
if ( strcmp(tok->str, "->") == 0 )
{
tok->setstr(".");
}
}
2008-08-28 08:37:11 +02:00
// typedef..
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
2008-08-28 08:37:11 +02:00
{
if (Tokenizer::Match(tok, "typedef %type% %type% ;"))
2008-08-28 08:37:11 +02:00
{
const char *type1 = getstr(tok, 1);
const char *type2 = getstr(tok, 2);
for ( TOKEN *tok2 = tok; tok2; tok2 = tok2->next )
{
2008-09-11 20:37:36 +02:00
if (tok2->str!=type1 && tok2->str!=type2 && strcmp(tok2->str,type2)==0)
2008-08-28 08:37:11 +02:00
{
tok2->setstr(type1);
2008-08-28 08:37:11 +02:00
}
}
}
else if (Tokenizer::Match(tok, "typedef %type% %type% %type% ;"))
2008-08-28 08:37:11 +02:00
{
const char *type1 = getstr(tok, 1);
const char *type2 = getstr(tok, 2);
const char *type3 = getstr(tok, 3);
TOKEN *tok2 = tok;
while ( ! Tokenizer::Match(tok2, ";") )
tok2 = tok2->next;
for ( ; tok2; tok2 = tok2->next )
2008-08-28 08:37:11 +02:00
{
if (tok2->str!=type3 && strcmp(tok2->str,type3)==0)
{
tok2->setstr(type1);
2008-08-28 08:37:11 +02:00
TOKEN *newtok = new TOKEN;
newtok->setstr(type2);
2008-08-28 08:37:11 +02:00
newtok->FileIndex = tok2->FileIndex;
newtok->linenr = tok2->linenr;
newtok->next = tok2->next;
tok2->next = newtok;
tok2 = newtok;
2008-08-28 08:37:11 +02:00
}
}
}
}
// Remove __asm..
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
{
if ( Tokenizer::Match(tok->next, "__asm {") )
{
while ( tok->next )
{
bool last = Tokenizer::Match( tok->next, "}" );
// Unlink and delete tok->next
TOKEN *next = tok->next;
tok->next = tok->next->next;
delete next;
// break if this was the last token to delete..
if (last)
break;
}
}
}
2007-05-29 08:24:36 +02:00
}
//---------------------------------------------------------------------------
2007-05-24 07:40:45 +02:00
2007-05-29 08:24:36 +02:00
//---------------------------------------------------------------------------
// Simplify token list
//---------------------------------------------------------------------------
void Tokenizer::SimplifyTokenList()
2007-05-29 08:24:36 +02:00
{
// Remove the keyword 'unsigned'
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
{
if (tok->next && strcmp(tok->next->str,"unsigned")==0)
{
DeleteNextToken( tok );
}
}
2007-05-24 07:40:45 +02:00
// Replace constants..
for (TOKEN *tok = _tokens; tok; tok = tok->next)
2007-05-24 07:40:45 +02:00
{
if (Tokenizer::Match(tok,"const %type% %var% = %num% ;"))
2007-05-24 07:40:45 +02:00
{
const char *sym = getstr(tok,2);
const char *num = getstr(tok,4);
2008-03-22 12:46:06 +01:00
for (TOKEN *tok2 = _gettok(tok,6); tok2; tok2 = tok2->next)
2007-05-24 07:40:45 +02:00
{
if (strcmp(tok2->str,sym) == 0)
{
tok2->setstr(num);
2007-05-24 07:40:45 +02:00
}
}
}
}
// Fill the map TypeSize..
TypeSize.clear();
TypeSize["char"] = sizeof(char);
TypeSize["short"] = sizeof(short);
TypeSize["int"] = sizeof(int);
TypeSize["long"] = sizeof(long);
TypeSize["float"] = sizeof(float);
TypeSize["double"] = sizeof(double);
for (TOKEN *tok = _tokens; tok; tok = tok->next)
{
if (Tokenizer::Match(tok,"class %var%"))
{
TypeSize[getstr(tok,1)] = 11;
}
else if (Tokenizer::Match(tok, "struct %var%"))
{
TypeSize[getstr(tok,1)] = 13;
}
}
// Replace 'sizeof(type)'..
for (TOKEN *tok = _tokens; tok; tok = tok->next)
{
if (strcmp(tok->str,"sizeof") != 0)
continue;
if (Tokenizer::Match(tok, "sizeof ( %type% * )"))
{
std::ostringstream str;
// 'sizeof(type *)' has the same size as 'sizeof(char *)'
str << sizeof(char *);
tok->setstr( str.str().c_str() );
2007-05-24 07:40:45 +02:00
for (int i = 0; i < 4; i++)
{
DeleteNextToken(tok);
}
}
else if (Tokenizer::Match(tok, "sizeof ( %type% )"))
{
const char *type = getstr(tok, 2);
int size = SizeOfType(type);
if (size > 0)
{
std::ostringstream str;
str << size;
tok->setstr( str.str().c_str() );
for (int i = 0; i < 3; i++)
{
DeleteNextToken(tok);
}
}
}
else if (Tokenizer::Match(tok, "sizeof ( * %var% )"))
{
tok->setstr("100");
for ( int i = 0; i < 4; ++i )
DeleteNextToken(tok);
}
2007-05-24 07:40:45 +02:00
}
// Replace 'sizeof(var)'
for (TOKEN *tok = _tokens; tok; tok = tok->next)
{
// type array [ num ] ;
if ( ! Tokenizer::Match(tok, "%type% %var% [ %num% ] ;") )
continue;
int size = SizeOfType(tok->str);
if (size <= 0)
continue;
const char *varname = getstr(tok, 1);
int total_size = size * atoi( getstr(tok, 3) );
// Replace 'sizeof(var)' with number
int indentlevel = 0;
2008-03-22 12:46:06 +01:00
for ( TOKEN *tok2 = _gettok(tok,5); tok2; tok2 = tok2->next )
{
if (tok2->str[0] == '{')
{
indentlevel++;
}
else if (tok2->str[0] == '}')
{
indentlevel--;
if (indentlevel < 0)
break;
}
// Todo: Tokenizer::Match varname directly
else if (Tokenizer::Match(tok2, "sizeof ( %var% )"))
{
if (strcmp(getstr(tok2,2), varname) == 0)
{
std::ostringstream str;
str << total_size;
tok2->setstr(str.str().c_str());
// Delete the other tokens..
for (int i = 0; i < 3; i++)
{
DeleteNextToken(tok2);
}
}
}
}
}
2007-05-29 08:24:36 +02:00
// Simple calculations..
bool done = false;
while (!done)
{
done = true;
for (TOKEN *tok = _tokens; tok; tok = tok->next)
{
if (Tokenizer::Match(tok->next, "* 1") || Tokenizer::Match(tok->next, "1 *"))
{
for (int i = 0; i < 2; i++)
DeleteNextToken(tok);
done = false;
}
// (1-2)
if (strchr("[,(=<>",tok->str[0]) &&
Tokenizer::IsNumber(getstr(tok,1)) &&
strchr("+-*/",*(getstr(tok,2))) &&
Tokenizer::IsNumber(getstr(tok,3)) &&
strchr("],);=<>",*(getstr(tok,4))) )
{
int i1 = atoi(getstr(tok,1));
int i2 = atoi(getstr(tok,3));
if ( i2 == 0 && *(getstr(tok,2)) == '/' )
{
continue;
}
switch (*(getstr(tok,2)))
{
case '+': i1 += i2; break;
case '-': i1 -= i2; break;
case '*': i1 *= i2; break;
case '/': i1 /= i2; break;
}
tok = tok->next;
std::ostringstream str;
str << i1;
tok->setstr(str.str().c_str());
for (int i = 0; i < 2; i++)
{
DeleteNextToken(tok);
}
2008-03-22 12:46:06 +01:00
done = false;
}
}
}
// Replace "*(str + num)" => "str[num]"
for (TOKEN *tok = _tokens; tok; tok = tok->next)
{
if ( ! strchr(";{}(=<>", tok->str[0]) )
continue;
TOKEN *next = tok->next;
if ( ! next )
break;
if (Tokenizer::Match(next, "* ( %var% + %num% )"))
{
const char *str[4] = {"var","[","num","]"};
str[0] = getstr(tok,3);
str[2] = getstr(tok,5);
for (int i = 0; i < 4; i++)
{
tok = tok->next;
tok->setstr(str[i]);
}
DeleteNextToken(tok);
DeleteNextToken(tok);
}
}
2007-05-29 08:24:36 +02:00
// Split up variable declarations if possible..
for (TOKEN *tok = _tokens; tok; tok = tok->next)
2007-05-29 08:24:36 +02:00
{
if ( ! strchr("{};", tok->str[0]) )
continue;
TOKEN *type0 = tok->next;
if (!Tokenizer::Match(type0, "%type%"))
continue;
if (Tokenizer::Match(type0, "else") || Tokenizer::Match(type0, "return"))
continue;
2007-05-29 08:24:36 +02:00
TOKEN *tok2 = NULL;
unsigned int typelen = 0;
if ( Tokenizer::Match(type0, "%type% %var% ,") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 2); // The ',' token
2007-05-29 08:24:36 +02:00
typelen = 1;
}
else if ( Tokenizer::Match(type0, "%type% * %var% ,") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 3); // The ',' token
2007-05-29 19:11:53 +02:00
typelen = 1;
2007-05-29 08:24:36 +02:00
}
else if ( Tokenizer::Match(type0, "%type% %var% [ %num% ] ,") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 5); // The ',' token
2007-05-29 08:24:36 +02:00
typelen = 1;
}
else if ( Tokenizer::Match(type0, "%type% * %var% [ %num% ] ,") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 6); // The ',' token
2007-05-29 19:11:53 +02:00
typelen = 1;
}
else if ( Tokenizer::Match(type0, "struct %type% %var% ,") )
2007-05-29 19:11:53 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 3);
2007-05-29 19:11:53 +02:00
typelen = 2;
}
else if ( Tokenizer::Match(type0, "struct %type% * %var% ,") )
2007-05-29 19:11:53 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 4);
2007-05-29 08:24:36 +02:00
typelen = 2;
}
else if ( Tokenizer::Match(type0, "%type% %var% =") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 2);
2007-05-29 08:24:36 +02:00
typelen = 1;
}
else if ( Tokenizer::Match(type0, "%type% * %var% =") )
2007-05-29 08:24:36 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 3);
2007-05-29 19:11:53 +02:00
typelen = 1;
}
else if ( Tokenizer::Match(type0, "struct %type% * %var% =") )
2007-05-29 19:11:53 +02:00
{
2008-03-22 12:46:06 +01:00
tok2 = _gettok(type0, 4);
2007-05-29 08:24:36 +02:00
typelen = 2;
}
if (tok2)
{
if (tok2->str[0] == ',')
{
tok2->setstr(";");
2007-05-29 08:24:36 +02:00
InsertTokens(tok2, type0, typelen);
}
else
{
TOKEN *eq = tok2;
int parlevel = 0;
while (tok2)
{
if ( strchr("{(", tok2->str[0]) )
{
parlevel++;
}
else if ( strchr("})", tok2->str[0]) )
{
if (parlevel<0)
break;
parlevel--;
}
else if ( parlevel==0 && strchr(";,",tok2->str[0]) )
{
// "type var =" => "type var; var ="
2008-03-22 12:46:06 +01:00
TOKEN *VarTok = _gettok(type0,typelen);
2007-05-29 19:11:53 +02:00
if (VarTok->str[0]=='*')
VarTok = VarTok->next;
InsertTokens(eq, VarTok, 2);
eq->setstr(";");
2007-05-29 08:24:36 +02:00
// "= x, " => "= x; type "
if (tok2->str[0] == ',')
{
tok2->setstr(";");
2007-05-29 08:24:36 +02:00
InsertTokens( tok2, type0, typelen );
}
break;
}
tok2 = tok2->next;
}
}
}
}
// Replace NULL with 0..
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
{
if ( Tokenizer::Match(tok, "NULL") )
tok->setstr("0");
}
// Replace pointer casts of 0.. "(char *)0" => "0"
for ( TOKEN *tok = _tokens; tok; tok = tok->next )
{
if ( Tokenizer::Match(tok->next, "( %type% * ) 0") || Tokenizer::Match(tok->next,"( %type% %type% * ) 0") )
{
while (!Tokenizer::Match(tok->next,"0"))
DeleteNextToken(tok);
}
2007-05-29 08:24:36 +02:00
}
2007-05-24 07:40:45 +02:00
}
//---------------------------------------------------------------------------
2007-05-29 08:24:36 +02:00
//---------------------------------------------------------------------------
// Helper functions for handling the tokens list
//---------------------------------------------------------------------------
const TOKEN *Tokenizer::findtoken(const TOKEN *tok1, const char *tokenstr[])
{
2008-03-22 12:46:06 +01:00
for (const TOKEN *ret = tok1; ret; ret = ret->next)
{
unsigned int i = 0;
2008-03-22 12:46:06 +01:00
const TOKEN *tok = ret;
while (tokenstr[i])
{
if (!tok)
return NULL;
if (*(tokenstr[i]) && strcmp(tokenstr[i],tok->str))
break;
tok = tok->next;
i++;
}
if (!tokenstr[i])
return ret;
}
return NULL;
}
//---------------------------------------------------------------------------
const TOKEN *Tokenizer::gettok(const TOKEN *tok, int index)
{
while (tok && index>0)
{
tok = tok->next;
index--;
}
return tok;
}
//---------------------------------------------------------------------------
const char *Tokenizer::getstr(const TOKEN *tok, int index)
{
tok = gettok(tok, index);
return tok ? tok->str : "";
}
//---------------------------------------------------------------------------
2007-05-29 08:24:36 +02:00
2007-05-29 19:11:53 +02:00
2007-05-29 08:24:36 +02:00
//---------------------------------------------------------------------------
const TOKEN *Tokenizer::GetFunctionTokenByName( const char funcname[] ) const
{
for ( unsigned int i = 0; i < FunctionList.size(); ++i )
{
if ( strcmp( FunctionList[i]->str, funcname ) == 0 )
{
return FunctionList[i];
}
}
return NULL;
}
void Tokenizer::FillFunctionList(const unsigned int file_id)
{
FunctionList.clear();
std::list<const char *> _usedfunc;
if ( file_id == 0 )
{
GlobalFunctions.clear();
}
bool staticfunc = false;
bool classfunc = false;
int indentlevel = 0;
for ( const TOKEN *tok = _tokens; tok; tok = tok->next )
{
if ( tok->str[0] == '{' )
indentlevel++;
else if ( tok->str[0] == '}' )
indentlevel--;
if (indentlevel > 0)
{
if ( _settings._checkCodingStyle )
{
const char *funcname = 0;
if ( Tokenizer::Match(tok,"%var% (") )
funcname = tok->str;
else if ( Tokenizer::Match(tok, "= %var% ;") ||
Tokenizer::Match(tok, "= %var% ,") )
funcname = tok->next->str;
if ( std::find(_usedfunc.begin(), _usedfunc.end(), funcname) == _usedfunc.end() )
_usedfunc.push_back( funcname );
}
continue;
}
if (strchr("};", tok->str[0]))
staticfunc = classfunc = false;
else if ( strcmp( tok->str, "static" ) == 0 )
staticfunc = true;
else if ( strcmp( tok->str, "::" ) == 0 )
classfunc = true;
else if (Tokenizer::Match(tok, "%var% ("))
{
// Check if this is the first token of a function implementation..
for ( const TOKEN *tok2 = tok; tok2; tok2 = tok2->next )
{
if ( tok2->str[0] == ';' )
{
tok = tok2;
break;
}
else if ( tok2->str[0] == '{' )
{
break;
}
else if ( tok2->str[0] == ')' )
{
if ( Tokenizer::Match(tok2, ") {") )
{
if (_settings._checkCodingStyle && !staticfunc && !classfunc && tok->FileIndex==0)
GlobalFunctions.push_back( GlobalFunction(file_id, tok->str) );
FunctionList.push_back( tok );
tok = tok2;
}
else
{
tok = tok2;
while (tok->next && !strchr(";{", tok->next->str[0]))
tok = tok->next;
}
break;
}
}
}
}
// If the FunctionList functions with duplicate names, remove them
// TODO this will need some better handling
for ( unsigned int func1 = 0; func1 < FunctionList.size(); )
{
bool hasDuplicates = false;
for ( unsigned int func2 = func1 + 1; func2 < FunctionList.size(); )
{
if ( strcmp(FunctionList[func1]->str, FunctionList[func2]->str) == 0 )
{
hasDuplicates = true;
FunctionList.erase( FunctionList.begin() + func2 );
}
else
{
++func2;
}
}
if ( ! hasDuplicates )
{
++func1;
}
else
{
FunctionList.erase( FunctionList.begin() + func1 );
}
}
for (std::list<const char *>::const_iterator it = _usedfunc.begin(); it != _usedfunc.end(); ++it)
{
if ( *it != 0 )
{
UsedGlobalFunctions.push_back( GlobalFunction(file_id, *it) );
}
}
}
//--------------------------------------------------------------------------
void Tokenizer::CheckGlobalFunctionUsage(const std::vector<std::string> &filenames)
{
// Iterator for GlobalFunctions
std::list<GlobalFunction>::const_iterator func;
// Iterator for UsedGlobalFunctions
std::list<GlobalFunction>::const_iterator usedfunc;
unsigned int i1 = 0;
unsigned int i2 = 1;
// Check that every function in GlobalFunctions are used
for ( func = GlobalFunctions.begin(); func != GlobalFunctions.end(); func++ )
{
if ( GlobalFunctions.size() > 100 )
{
++i1;
if ( i1 > (i2 * GlobalFunctions.size()) / 100 )
{
if ( (i2 % 10) == 0 )
std::cout << i2 << "%";
else
std::cout << ".";
std::cout.flush();
++i2;
}
}
const std::string &funcname = func->name();
if ( funcname == "main" || funcname == "WinMain" )
continue;
// Check if this global function is used in any of the other files..
bool UsedOtherFile = false;
bool UsedAnyFile = false;
for ( usedfunc = UsedGlobalFunctions.begin(); usedfunc != UsedGlobalFunctions.end(); usedfunc++ )
{
if ( funcname == usedfunc->name() )
{
UsedAnyFile = true;
if (func->file_id() != usedfunc->file_id())
{
UsedOtherFile = true;
break;
}
}
}
if ( ! UsedAnyFile )
{
std::ostringstream errmsg;
errmsg << "[" << filenames[func->file_id()] << "]: "
<< "The function '" << func->name() << "' is never used.";
_errorLogger->reportErr( errmsg.str() );
}
else if ( ! UsedOtherFile )
{
std::ostringstream errmsg;
errmsg << "[" << filenames[func->file_id()] << "]: "
<< "The linkage of the function '" << func->name() << "' can be local (static) instead of global";
_errorLogger->reportErr( errmsg.str() );
}
}
std::cout << "\n";
}
//---------------------------------------------------------------------------
2007-05-29 08:24:36 +02:00
void Tokenizer::settings( const Settings &settings )
{
_settings = settings;
}
// Deallocate lists..
void Tokenizer::DeallocateTokens()
{
deleteTokens( _tokens );
_tokens = 0;
tokens_back = 0;
while (dsymlist)
{
struct DefineSymbol *next = dsymlist->next;
free(dsymlist->name);
free(dsymlist->value);
delete dsymlist;
dsymlist = next;
}
Files.clear();
}
void Tokenizer::deleteTokens(TOKEN *tok)
{
while (tok)
{
TOKEN *next = tok->next;
delete tok;
tok = next;
}
}
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
const char *Tokenizer::getParameterName( const TOKEN *ftok, int par )
{
int _par = 1;
for ( ; ftok; ftok = ftok->next)
{
if ( Tokenizer::Match(ftok, ",") )
++_par;
if ( par==_par && Tokenizer::Match(ftok, "%var% [,)]") )
return ftok->str;
}
return NULL;
}
//---------------------------------------------------------------------------
const TOKEN *Tokenizer::findmatch(const TOKEN *tok, const char pattern[], const char *varname1[], const char *varname2[])
{
for ( ; tok; tok = tok->next)
{
if ( Tokenizer::Match(tok, pattern, varname1, varname2) )
return tok;
}
return 0;
}
//---------------------------------------------------------------------------
std::string Tokenizer::fileLine( const TOKEN *tok )
{
std::ostringstream ostr;
ostr << "[" << Files.at(tok->FileIndex) << ":" << tok->linenr << "]";
return ostr.str();
}
bool Tokenizer::Match(const TOKEN *tok, const char pattern[], const char *varname1[], const char *varname2[])
{
if (!tok)
return false;
const char *p = pattern;
while (*p)
{
// Skip spaces in pattern..
while ( *p == ' ' )
p++;
// Extract token from pattern..
char str[50];
char *s = str;
while (*p && *p!=' ')
{
*s = *p;
s++;
p++;
}
*s = 0;
// No token => Success!
if (str[0] == 0)
return true;
// Any symbolname..
if (strcmp(str,"%var%")==0 || strcmp(str,"%type%")==0)
{
if (!Tokenizer::IsName(tok->str))
return false;
}
// Variable name..
else if (strcmp(str,"%var1%")==0 || strcmp(str,"%var2%")==0)
{
const char **varname = (strcmp(str,"%var1%")==0) ? varname1 : varname2;
if ( ! varname )
return false;
if (strcmp(tok->str, varname[0]) != 0)
return false;
for ( int i = 1; varname[i]; i++ )
{
if ( ! Tokenizer::gettok(tok, 2) )
return false;
if ( strcmp(Tokenizer::getstr(tok, 1), ".") )
return false;
if ( strcmp(Tokenizer::getstr(tok, 2), varname[i]) )
return false;
tok = Tokenizer::gettok(tok, 2);
}
}
else if (strcmp(str,"%num%")==0)
{
if ( ! Tokenizer::IsNumber(tok->str) )
return false;
}
else if (strcmp(str,"%str%")==0)
{
if ( tok->str[0] != '\"' )
return false;
}
// [.. => search for a one-character token..
else if (str[0]=='[' && strchr(str, ']') && tok->str[1] == 0)
{
*strrchr(str, ']') = 0;
if ( strchr( str + 1, tok->str[0] ) == 0 )
return false;
}
else if (strcmp(str, tok->str) != 0)
return false;
tok = tok->next;
if (!tok && *p)
return false;
}
// The end of the pattern has been reached and nothing wrong has been found
return true;
}
//---------------------------------------------------------------------------
bool Tokenizer::SameFileName( const char fname1[], const char fname2[] )
{
#ifdef __linux__
return bool( strcmp(fname1, fname2) == 0 );
#endif
#ifdef __GNUC__
return bool( strcasecmp(fname1, fname2) == 0 );
#endif
#ifdef __BORLANDC__
return bool( stricmp(fname1, fname2) == 0 );
#endif
#ifdef _MSC_VER
return bool( _stricmp(fname1, fname2) == 0 );
#endif
}
bool Tokenizer::IsName(const char str[])
{
return bool(str[0]=='_' || isalpha(str[0]));
}
//---------------------------------------------------------------------------
bool Tokenizer::IsNumber(const char str[])
{
return bool(isdigit(str[0]) != 0);
}
//---------------------------------------------------------------------------
bool Tokenizer::IsStandardType(const char str[])
{
if (!str)
return false;
bool Ret = false;
const char *type[] = {"bool","char","short","int","long","float","double",0};
for (int i = 0; type[i]; i++)
Ret |= (strcmp(str,type[i])==0);
return Ret;
}
//---------------------------------------------------------------------------
bool Tokenizer::alwaysTrue( const TOKEN *tok )
{
return (Match(tok,"( 1 [|)]") | Match(tok,"( 1 ||") |
Match(tok,"( true [|)]") | Match(tok,"( true ||"));
}
//---------------------------------------------------------------------------
bool Tokenizer::alwaysFalse( const TOKEN *tok )
{
return (Match(tok,"( 0 [&)]") | Match(tok,"( 0 &&") |
Match(tok,"( false [&)]") | Match(tok,"( false &&"));
}
//---------------------------------------------------------------------------