tokenize: Added files

This commit is contained in:
Daniel Marjamäki 2007-05-24 05:40:45 +00:00
parent 442ed0fa00
commit ae4c97f814
4 changed files with 441 additions and 309 deletions

View File

@ -4,7 +4,7 @@
<MACROS> <MACROS>
<VERSION value="BCB.06.00"/> <VERSION value="BCB.06.00"/>
<PROJECT value="cppcheck.exe"/> <PROJECT value="cppcheck.exe"/>
<OBJFILES value="main.obj"/> <OBJFILES value="main.obj tokenize.obj"/>
<RESFILES value="cppcheck.res"/> <RESFILES value="cppcheck.res"/>
<DEFFILE value=""/> <DEFFILE value=""/>
<RESDEPEN value="$(RESFILES)"/> <RESDEPEN value="$(RESFILES)"/>
@ -53,6 +53,7 @@
<FILE FILENAME="cppcheck.res" FORMNAME="" UNITNAME="cppcheck.res" CONTAINERID="ResTool" DESIGNCLASS="" LOCALCOMMAND=""/> <FILE FILENAME="cppcheck.res" FORMNAME="" UNITNAME="cppcheck.res" CONTAINERID="ResTool" DESIGNCLASS="" LOCALCOMMAND=""/>
<FILE FILENAME="cppcheck.bpf" FORMNAME="" UNITNAME="cppcheck" CONTAINERID="BPF" DESIGNCLASS="" LOCALCOMMAND=""/> <FILE FILENAME="cppcheck.bpf" FORMNAME="" UNITNAME="cppcheck" CONTAINERID="BPF" DESIGNCLASS="" LOCALCOMMAND=""/>
<FILE FILENAME="main.cpp" FORMNAME="" UNITNAME="main" CONTAINERID="CCompiler" DESIGNCLASS="" LOCALCOMMAND=""/> <FILE FILENAME="main.cpp" FORMNAME="" UNITNAME="main" CONTAINERID="CCompiler" DESIGNCLASS="" LOCALCOMMAND=""/>
<FILE FILENAME="tokenize.cpp" FORMNAME="" UNITNAME="tokenize" CONTAINERID="CCompiler" DESIGNCLASS="" LOCALCOMMAND=""/>
</FILELIST> </FILELIST>
<BUILDTOOLS> <BUILDTOOLS>
</BUILDTOOLS> </BUILDTOOLS>
@ -105,7 +106,7 @@ Item0=_DEBUG
DebugSourceDirs=$(BCB)\source\vcl DebugSourceDirs=$(BCB)\source\vcl
[Parameters] [Parameters]
RunParams=testdelete1\testdelete1.cpp RunParams=testbufferoverrun5\testbufferoverrun5.cpp
Launcher= Launcher=
UseLauncher=0 UseLauncher=0
DebugCWD= DebugCWD=

310
main.cpp
View File

@ -13,21 +13,12 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "tokenize.h" // <- Tokenizer
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
std::vector<std::string> Files;
static bool Debug = false; static bool Debug = false;
static bool ShowWarnings = false; static bool ShowWarnings = false;
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
struct TOKEN
{
unsigned int FileIndex;
char *str;
unsigned int linenr;
struct TOKEN *next;
};
struct TOKEN *tokens, *tokens_back;
void Tokenize(const char FileName[]);
//---------------------------------------------------------------------------
std::vector<std::string> VariableNames; std::vector<std::string> VariableNames;
struct STATEMENT struct STATEMENT
{ {
@ -188,301 +179,6 @@ static void CppCheck(const char FileName[])
} }
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
void addtoken(const char str[], const unsigned int lineno, const unsigned int fileno)
{
if (str[0] == 0)
return;
// Replace hexadecimal value with decimal
char str2[50];
memset(str2, 0, sizeof(str2));
if (strncmp(str,"0x",2)==0)
{
unsigned int value = strtoul(str+2, NULL, 16);
itoa(value, str2, 10);
}
TOKEN *newtoken = new TOKEN;
memset(newtoken, 0, sizeof(TOKEN));
newtoken->str = strdup(str2[0] ? str2 : str);
newtoken->linenr = lineno;
newtoken->FileIndex = fileno;
if (tokens_back)
{
tokens_back->next = newtoken;
tokens_back = newtoken;
}
else
{
tokens = tokens_back = newtoken;
}
}
//---------------------------------------------------------------------------
void combine_2tokens(TOKEN *tok, const char str1[], const char str2[])
{
if (!(tok && tok->next))
return;
if (strcmp(tok->str,str1) || strcmp(tok->next->str,str2))
return;
free(tok->str);
free(tok->next->str);
tok->str = (char *)malloc(strlen(str1)+strlen(str2)+1);
strcpy(tok->str, str1);
strcat(tok->str, str2);
TOKEN *toknext = tok->next;
tok->next = toknext->next;
delete toknext;
}
//---------------------------------------------------------------------------
static bool match(TOKEN *tok, const std::string pattern);
TOKEN *gettok(TOKEN *tok, int index);
const char *getstr(TOKEN *tok, int index);
void Tokenize(const char FileName[])
{
// Has this file been tokenized already?
for (unsigned int i = 0; i < Files.size(); i++)
{
if ( stricmp(Files[i].c_str(), FileName) == 0 )
return;
}
std::ifstream fin(FileName);
if (!fin.is_open())
return;
unsigned int CurrentFile = Files.size();
Files.push_back(FileName);
unsigned int lineno = 1;
char CurrentToken[1000];
memset(CurrentToken, 0, sizeof(CurrentToken));
char *pToken = CurrentToken;
for (char ch = (char)fin.get(); !fin.eof(); ch = (char)fin.get())
{
if (ch == '#' && !CurrentToken[0])
{
std::string line;
getline(fin,line);
line = "#" + line;
if (strncmp(line.c_str(),"#include",8)==0 &&
line.find("\"") != std::string::npos)
{
// Extract the filename
line.erase(0, line.find("\"")+1);
line.erase(line.find("\""));
}
// Relative path..
if (strchr(FileName,'\\'))
{
char path[1000];
memset(path,0,sizeof(path));
const char *p = strrchr(FileName, '\\');
memcpy(path, FileName, p-FileName+1);
line = path + line;
}
addtoken("#include", lineno, CurrentFile);
addtoken(line.c_str(), lineno, CurrentFile);
Tokenize(line.c_str());
lineno++;
continue;
}
if (ch == '\n')
{
// Add current token..
addtoken(CurrentToken, lineno++, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
// Comments..
if (ch == '/' && !fin.eof())
{
// Add current token..
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
// Read next character..
ch = (char)fin.get();
// If '//'..
if (ch == '/')
{
while (!fin.eof() && (char)fin.get()!='\n');
lineno++;
continue;
}
// If '/*'..
if (ch == '*')
{
char chPrev;
ch = chPrev = 'A';
while (!fin.eof() && (chPrev!='*' || ch!='/'))
{
chPrev = ch;
ch = (char)fin.get();
if (ch == '\n')
lineno++;
}
continue;
}
// Not a comment.. add token..
addtoken("/", lineno, CurrentFile);
}
// char..
if (ch == '\'')
{
// Add previous token
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
// Read this ..
CurrentToken[0] = ch;
CurrentToken[1] = (char)fin.get();
CurrentToken[2] = (char)fin.get();
if (CurrentToken[1] == '\\')
CurrentToken[3] = (char)fin.get();
// Add token and start on next..
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
// String..
if (ch == '\"')
{
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
bool special = false;
char c = ch;
do
{
// Append token..
*pToken = c;
pToken++;
// Special sequence '\.'
if (special)
special = false;
else
special = (c == '\\');
// Get next character
c = (char)fin.get();
}
while (special || c != '\"');
*pToken = '\"';
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
if (strchr("+-*/%&|^?!=<>[](){};:,.",ch))
{
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
CurrentToken[0] = ch;
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
if (std::isspace(ch) || std::iscntrl(ch))
{
addtoken(CurrentToken, lineno, CurrentFile);
pToken = CurrentToken;
memset(CurrentToken, 0, sizeof(CurrentToken));
continue;
}
*pToken = ch;
pToken++;
}
// Combine tokens..
for (TOKEN *tok = tokens; tok && tok->next; tok = tok->next)
{
combine_2tokens(tok, "<", "<");
combine_2tokens(tok, ">", ">");
combine_2tokens(tok, "&", "&");
combine_2tokens(tok, "|", "|");
combine_2tokens(tok, "+", "=");
combine_2tokens(tok, "-", "=");
combine_2tokens(tok, "*", "=");
combine_2tokens(tok, "/", "=");
combine_2tokens(tok, "&", "=");
combine_2tokens(tok, "|", "=");
combine_2tokens(tok, "=", "=");
combine_2tokens(tok, "!", "=");
combine_2tokens(tok, "<", "=");
combine_2tokens(tok, ">", "=");
combine_2tokens(tok, ":", ":");
combine_2tokens(tok, "-", ">");
combine_2tokens(tok, "private", ":");
combine_2tokens(tok, "protected", ":");
combine_2tokens(tok, "public", ":");
}
// Replace constants..
for (TOKEN *tok = tokens; tok; tok = tok->next)
{
if (strcmp(tok->str,"const"))
continue;
const char *sym=NULL, *num=NULL;
if (match(tok,"const int var = num ;"))
{
sym = getstr(tok,2);
num = getstr(tok,4);
}
else if (match(tok,"const unsigned int var = num ;"))
{
sym = getstr(tok,3);
num = getstr(tok,5);
}
if (sym && num)
{
for (TOKEN *tok2 = gettok(tok,6); tok2; tok2 = tok2->next)
{
if (strcmp(tok2->str,sym) == 0)
{
free(tok2->str);
tok2->str = strdup(num);
}
}
}
}
}
//---------------------------------------------------------------------------
@ -526,7 +222,7 @@ TOKEN *findtoken(TOKEN *tok1, const char *tokenstr[])
return NULL; return NULL;
} }
static bool match(TOKEN *tok, const std::string pattern) bool match(TOKEN *tok, const std::string pattern)
{ {
if (!tok) if (!tok)
return false; return false;

410
tokenize.cpp Normal file
View File

@ -0,0 +1,410 @@
//---------------------------------------------------------------------------
#include "tokenize.h"
//---------------------------------------------------------------------------
#include <locale>
#include <fstream>
#include <stdlib.h> // <- strtoul
std::vector<std::string> Files;
struct TOKEN *tokens, *tokens_back;
// These functions are in "main.cpp"
bool match(struct TOKEN *tok, const std::string pattern);
TOKEN *gettok(struct TOKEN *tok, int index);
const char *getstr(struct TOKEN *tok, int index);
struct DefineSymbol
{
char *name;
char *value;
struct DefineSymbol *next;
};
static struct DefineSymbol * dsymlist;
static void Define(const char Name[], const char Value[])
{
if (!(Name && Name[0]))
return;
if (!(Value && Value[0]))
return;
// Is 'Value' a decimal value..
bool dec = true, hex = true;
for (int i = 0; Value[i]; i++)
{
if ( ! std::isdigit(Value[i]) )
dec = false;
if ( ! std::isxdigit(Value[i]) && (!(i==1 && Value[i]=='x')))
hex = false;
}
if (!dec && !hex)
return;
char *strValue = strdup(Value);
if (!dec && hex)
{
char str[50];
unsigned long value = strtoul(Value+2, NULL, 16);
free(strValue);
strValue = strdup(itoa(value, str, 10));
}
DefineSymbol *NewSym = new DefineSymbol;
memset(NewSym, 0, sizeof(DefineSymbol));
NewSym->name = strdup(Name);
NewSym->value = strValue;
NewSym->next = dsymlist;
dsymlist = NewSym;
}
static void addtoken(const char str[], const unsigned int lineno, const unsigned int fileno)
{
if (str[0] == 0)
return;
// Replace hexadecimal value with decimal
char str2[50];
memset(str2, 0, sizeof(str2));
if (strncmp(str,"0x",2)==0)
{
unsigned int value = strtoul(str+2, NULL, 16);
itoa(value, str2, 10);
}
TOKEN *newtoken = new TOKEN;
memset(newtoken, 0, sizeof(TOKEN));
newtoken->str = strdup(str2[0] ? str2 : str);
newtoken->linenr = lineno;
newtoken->FileIndex = fileno;
if (tokens_back)
{
tokens_back->next = newtoken;
tokens_back = newtoken;
}
else
{
tokens = tokens_back = newtoken;
}
// Check if str is defined..
for (DefineSymbol *sym = dsymlist; sym; sym = sym->next)
{
if (strcmp(str,sym->name)==0)
{
free(newtoken->str);
newtoken->str = strdup(sym->value);
break;
}
}
}
//---------------------------------------------------------------------------
static void combine_2tokens(TOKEN *tok, const char str1[], const char str2[])
{
if (!(tok && tok->next))
return;
if (strcmp(tok->str,str1) || strcmp(tok->next->str,str2))
return;
free(tok->str);
free(tok->next->str);
tok->str = (char *)malloc(strlen(str1)+strlen(str2)+1);
strcpy(tok->str, str1);
strcat(tok->str, str2);
TOKEN *toknext = tok->next;
tok->next = toknext->next;
delete toknext;
}
//---------------------------------------------------------------------------
void Tokenize(const char FileName[])
{
// Has this file been tokenized already?
for (unsigned int i = 0; i < Files.size(); i++)
{
if ( stricmp(Files[i].c_str(), FileName) == 0 )
return;
}
std::ifstream fin(FileName);
if (!fin.is_open())
return;
unsigned int CurrentFile = Files.size();
Files.push_back(FileName);
unsigned int lineno = 1;
char CurrentToken[1000];
memset(CurrentToken, 0, sizeof(CurrentToken));
char *pToken = CurrentToken;
for (char ch = (char)fin.get(); !fin.eof(); ch = (char)fin.get())
{
if (ch == '#' && !CurrentToken[0])
{
std::string line;
getline(fin,line);
line = "#" + line;
if (strncmp(line.c_str(),"#include",8)==0 &&
line.find("\"") != std::string::npos)
{
// Extract the filename
line.erase(0, line.find("\"")+1);
line.erase(line.find("\""));
// Relative path..
if (strchr(FileName,'\\'))
{
char path[1000];
memset(path,0,sizeof(path));
const char *p = strrchr(FileName, '\\');
memcpy(path, FileName, p-FileName+1);
line = path + line;
}
addtoken("#include", lineno, CurrentFile);
addtoken(line.c_str(), lineno, CurrentFile);
Tokenize(line.c_str());
}
else if (strncmp(line.c_str(), "#define", 7) == 0)
{
char *strId = NULL;
enum {Space1, Id, Space2, Value} State;
State = Space1;
for (unsigned int i = 8; i < line.length(); i++)
{
if (State==Space1 || State==Space2)
{
if (std::isspace(line[i]))
continue;
State = (State==Space1) ? Id : Value;
}
else if (State==Id && std::isspace(line[i]))
{
strId = strdup(CurrentToken);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
State = Space2;
continue;
}
*pToken = line[i];
pToken++;
}
if (State==Value)
{
Define(strId, CurrentToken);
}
pToken = CurrentToken;
memset(CurrentToken, 0, sizeof(CurrentToken));
free(strId);
}
lineno++;
continue;
}
if (ch == '\n')
{
// Add current token..
addtoken(CurrentToken, lineno++, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
// Comments..
if (ch == '/' && !fin.eof())
{
// Add current token..
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
// Read next character..
ch = (char)fin.get();
// If '//'..
if (ch == '/')
{
while (!fin.eof() && (char)fin.get()!='\n');
lineno++;
continue;
}
// If '/*'..
if (ch == '*')
{
char chPrev;
ch = chPrev = 'A';
while (!fin.eof() && (chPrev!='*' || ch!='/'))
{
chPrev = ch;
ch = (char)fin.get();
if (ch == '\n')
lineno++;
}
continue;
}
// Not a comment.. add token..
addtoken("/", lineno, CurrentFile);
}
// char..
if (ch == '\'')
{
// Add previous token
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
// Read this ..
CurrentToken[0] = ch;
CurrentToken[1] = (char)fin.get();
CurrentToken[2] = (char)fin.get();
if (CurrentToken[1] == '\\')
CurrentToken[3] = (char)fin.get();
// Add token and start on next..
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
// String..
if (ch == '\"')
{
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
bool special = false;
char c = ch;
do
{
// Append token..
*pToken = c;
pToken++;
// Special sequence '\.'
if (special)
special = false;
else
special = (c == '\\');
// Get next character
c = (char)fin.get();
}
while (special || c != '\"');
*pToken = '\"';
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
if (strchr("+-*/%&|^?!=<>[](){};:,.",ch))
{
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
CurrentToken[0] = ch;
addtoken(CurrentToken, lineno, CurrentFile);
memset(CurrentToken, 0, sizeof(CurrentToken));
pToken = CurrentToken;
continue;
}
if (std::isspace(ch) || std::iscntrl(ch))
{
addtoken(CurrentToken, lineno, CurrentFile);
pToken = CurrentToken;
memset(CurrentToken, 0, sizeof(CurrentToken));
continue;
}
*pToken = ch;
pToken++;
}
// Combine tokens..
for (TOKEN *tok = tokens; tok && tok->next; tok = tok->next)
{
combine_2tokens(tok, "<", "<");
combine_2tokens(tok, ">", ">");
combine_2tokens(tok, "&", "&");
combine_2tokens(tok, "|", "|");
combine_2tokens(tok, "+", "=");
combine_2tokens(tok, "-", "=");
combine_2tokens(tok, "*", "=");
combine_2tokens(tok, "/", "=");
combine_2tokens(tok, "&", "=");
combine_2tokens(tok, "|", "=");
combine_2tokens(tok, "=", "=");
combine_2tokens(tok, "!", "=");
combine_2tokens(tok, "<", "=");
combine_2tokens(tok, ">", "=");
combine_2tokens(tok, ":", ":");
combine_2tokens(tok, "-", ">");
combine_2tokens(tok, "private", ":");
combine_2tokens(tok, "protected", ":");
combine_2tokens(tok, "public", ":");
}
// Replace constants..
for (TOKEN *tok = tokens; tok; tok = tok->next)
{
if (strcmp(tok->str,"const"))
continue;
const char *sym=NULL, *num=NULL;
if (match(tok,"const int var = num ;"))
{
sym = getstr(tok,2);
num = getstr(tok,4);
}
else if (match(tok,"const unsigned int var = num ;"))
{
sym = getstr(tok,3);
num = getstr(tok,5);
}
if (sym && num)
{
for (TOKEN *tok2 = gettok(tok,6); tok2; tok2 = tok2->next)
{
if (strcmp(tok2->str,sym) == 0)
{
free(tok2->str);
tok2->str = strdup(num);
}
}
}
}
}
//---------------------------------------------------------------------------

25
tokenize.h Normal file
View File

@ -0,0 +1,25 @@
//---------------------------------------------------------------------------
#ifndef tokenizeH
#define tokenizeH
//---------------------------------------------------------------------------
#include <string>
#include <vector>
extern std::vector<std::string> Files;
struct TOKEN
{
unsigned int FileIndex;
char *str;
unsigned int linenr;
struct TOKEN *next;
};
extern struct TOKEN *tokens, *tokens_back;
void Tokenize(const char FileName[]);
//---------------------------------------------------------------------------
#endif