Improved tokenizer to handle '#' better. Previously everything after # was combined into a single token,

now # is considered more like an alphabet, with few exceptions, e.g. "##" tokens.
2009-03-15 23:09:27 +02:00 · 2009-03-15 23:09:27 +02:00 · 21b687b301
parent a6ad972aad
commit 21b687b301
4 changed files with 73 additions and 101 deletions
--- a/src/tokenize.cpp
+++ b/src/tokenize.cpp
@ -177,20 +177,10 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
        if (ch < 0)
            continue;
        if (ch == '\n')
        {
            // Add current token..
            addtoken(CurrentToken.c_str(), lineno++, FileIndex);
            CurrentToken.clear();
            continue;
        }
        // char/string..
        if (ch == '\'' || ch == '\"')
        {
-            // Add previous token
+            std::string line;
            addtoken(CurrentToken.c_str(), lineno, FileIndex);
            CurrentToken.clear();
            // read char
            bool special = false;
@ -198,7 +188,7 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
            do
            {
                // Append token..
-                CurrentToken += c;
+                line += c;
                if (c == '\n')
                    ++lineno;
@ -213,60 +203,13 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
                c = (char)code.get();
            }
            while (code.good() && (special || c != ch));
-            CurrentToken += ch;
+            line += ch;
-            // Add token and start on next..
+            // Handle #file "file.h"
-            addtoken(CurrentToken.c_str(), lineno, FileIndex);
+            if (CurrentToken == "#file")
            CurrentToken.clear();
            continue;
        }
        if (ch == '#' && CurrentToken.empty())
        {
            // If previous token was "#" then append this to create a "##" token
            if (Token::simpleMatch(_tokensBack, "#"))
            {
                _tokensBack->str("##");
                continue;
            }
            std::string line("#");
            {
                char chPrev = '#';
                bool skip = false;
                while (code.good())
                {
                    ch = (char)code.get();
                    if (chPrev != '\\' && ch == '\n')
                        break;
                    if (chPrev == '\\')
                        line += chPrev;
                    if (chPrev == '#' && ch == '#')
                    {
                        addtoken("##", lineno, FileIndex);
                        skip = true;
                        break;
                    }
                    if (ch != ' ')
                        chPrev = ch;
                    if (ch != '\\' && ch != '\n')
                    {
                        line += ch;
                    }
                    if (ch == '\n')
                        ++lineno;
                }
                if (skip)
                    continue;
            }
            if (strncmp(line.c_str(), "#file", 5) == 0 &&
                line.find("\"") != std::string::npos)
            {
                // Extract the filename
-                line.erase(0, line.find("\"") + 1);
+                line = line.substr(1, line.length() - 2);
                if (line.find("\"") != std::string::npos)
                    line.erase(line.find("\""));
                // Has this file been tokenized already?
                ++lineno;
@ -290,33 +233,23 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
                }
                lineNumbers.push_back(lineno);
-                lineno = 1;
+                lineno = 0;
                continue;
            }
            else if (strncmp(line.c_str(), "#endfile", 8) == 0)
            {
                if (lineNumbers.empty() || fileIndexes.empty())
                {
                    std::cerr << "####### Preprocessor bug! #######\n";
                    std::exit(0);
                }
                lineno = lineNumbers.back();
                lineNumbers.pop_back();
                FileIndex = fileIndexes.back();
                fileIndexes.pop_back();
                continue;
            }
            else
            {
                // Add previous token
                addtoken(CurrentToken.c_str(), lineno, FileIndex);
                // Add content of the string
                addtoken(line.c_str(), lineno, FileIndex);
            }
            CurrentToken.clear();
            continue;
        }
-        if (strchr("#+-*/%&|^?!=<>[](){};:,.~", ch))
+        if (strchr("+-*/%&|^?!=<>[](){};:,.~\n ", ch))
        {
            if (ch == '.' &&
                CurrentToken.length() > 0 &&
@ -333,8 +266,55 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
            }
            else
            {
-                addtoken(CurrentToken.c_str(), lineno, FileIndex);
+                if (CurrentToken == "#file")
                {
                    // Handle this where strings are handled
                    continue;
                }
                else if (CurrentToken == "#endfile")
                {
                    if (lineNumbers.empty() || fileIndexes.empty())
                    {
                        std::cerr << "####### Preprocessor bug! #######\n";
                        std::exit(0);
                    }
                    lineno = lineNumbers.back();
                    lineNumbers.pop_back();
                    FileIndex = fileIndexes.back();
                    fileIndexes.pop_back();
                    CurrentToken.clear();
                    continue;
                }
                // If token contains # characters, split it up
                std::string temp;
                for (std::string::size_type i = 0; i < CurrentToken.length(); ++i)
                {
                    if (CurrentToken[i] == '#' && CurrentToken.length() + 1 > i && CurrentToken[i+1] == '#')
                    {
                        addtoken(temp.c_str(), lineno, FileIndex);
                        temp.clear();
                        addtoken("##", lineno, FileIndex);
                        ++i;
                    }
                    else
                        temp += CurrentToken[i];
                }
                addtoken(temp.c_str(), lineno, FileIndex);
                CurrentToken.clear();
                if (ch == '\n')
                {
                    ++lineno;
                    continue;
                }
                else if (ch == ' ')
                {
                    continue;
                }
                CurrentToken += ch;
                // Add "++", "--" or ">>" token
                if ((ch == '+' || ch == '-' || ch == '>') && (code.peek() == ch))
@ -345,14 +325,6 @@ void Tokenizer::tokenize(std::istream &code, const char FileName[])
            }
        }
        if (std::isspace(ch) || std::iscntrl(ch))
        {
            addtoken(CurrentToken.c_str(), lineno, FileIndex);
            CurrentToken.clear();
            continue;
        }
        CurrentToken += ch;
    }
    addtoken(CurrentToken.c_str(), lineno, FileIndex);
--- a/test/testmemleak.cpp
+++ b/test/testmemleak.cpp
@ -1522,8 +1522,8 @@ private:
        check("class A\n"
              "{\n"
              "public:\n"
-              "	   int * p;\n"
+              "    int * p;\n"
-              "	   A() { p = new int; }\n"
+              "    A() { p = new int; }\n"
              "};\n", true);
        ASSERT_EQUALS("[test.cpp:4]: (all) Memory leak: A::p\n", errout.str());
    }
@ -1533,8 +1533,8 @@ private:
        check("class A\n"
              "{\n"
              "public:\n"
-              "	   int * p;\n"
+              "    int * p;\n"
-              "	   A();\n"
+              "    A();\n"
              "};\n"
              "A::A() : p(new int[10])\n"
              "{ }", true);
--- a/test/testpreprocessor.cpp
+++ b/test/testpreprocessor.cpp
@ -107,7 +107,7 @@ private:
        TEST_CASE(multi_character_character);
        TEST_CASE(stringify);
-        // TODO TEST_CASE(stringify2);
+        TEST_CASE(stringify2);
        TEST_CASE(ifdefwithfile);
        TEST_CASE(pragma);
    }
--- a/test/testtokenize.cpp
+++ b/test/testtokenize.cpp
@ -912,10 +912,10 @@ private:
    {
        const std::string code("void func()\n"
                               "{\n"
-                               "	char a[256] = \"test\";\n"
+                               "char a[256] = \"test\";\n"
-                               "	{\n"
+                               "{\n"
-                               "		char b[256] = \"test\";\n"
+                               "char b[256] = \"test\";\n"
-                               "	}\n"
+                               "}\n"
                               "}\n");
        // tokenize..