From 674231ae52680fce13e39392bd942dda1ffcc12b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Fri, 24 Feb 2023 21:43:27 +0100 Subject: [PATCH] added clean C/C++ keyword lists and use them in `TokenList` (#3774) --- Makefile | 6 +- lib/cppcheck.vcxproj | 2 + lib/keywords.cpp | 220 +++++++++++++++++++++++++++++++++++++++++++ lib/keywords.h | 37 ++++++++ lib/lib.pri | 2 + lib/tokenlist.cpp | 114 ++++++---------------- lib/tokenlist.h | 3 - 7 files changed, 293 insertions(+), 91 deletions(-) create mode 100644 lib/keywords.cpp create mode 100644 lib/keywords.h diff --git a/Makefile b/Makefile index 04c6a3cbd..3d8eb300f 100644 --- a/Makefile +++ b/Makefile @@ -227,6 +227,7 @@ LIBOBJ = $(libcppdir)/analyzerinfo.o \ $(libcppdir)/fwdanalysis.o \ $(libcppdir)/importproject.o \ $(libcppdir)/infer.o \ + $(libcppdir)/keywords.o \ $(libcppdir)/library.o \ $(libcppdir)/mathlib.o \ $(libcppdir)/path.o \ @@ -564,6 +565,9 @@ $(libcppdir)/importproject.o: lib/importproject.cpp externals/picojson/picojson. $(libcppdir)/infer.o: lib/infer.cpp lib/calculate.h lib/config.h lib/errortypes.h lib/infer.h lib/mathlib.h lib/valueptr.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/infer.cpp +$(libcppdir)/keywords.o: lib/keywords.cpp lib/config.h lib/keywords.h lib/standards.h lib/utils.h + $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/keywords.cpp + $(libcppdir)/library.o: lib/library.cpp externals/tinyxml2/tinyxml2.h lib/astutils.h lib/config.h lib/errortypes.h lib/library.h lib/mathlib.h lib/path.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/symboldatabase.h lib/templatesimplifier.h lib/token.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/library.cpp @@ -615,7 +619,7 @@ $(libcppdir)/token.o: lib/token.cpp lib/astutils.h lib/config.h lib/errortypes.h $(libcppdir)/tokenize.o: lib/tokenize.cpp externals/simplecpp/simplecpp.h lib/check.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/preprocessor.h lib/settings.h lib/sourcelocation.h lib/standards.h lib/summaries.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/tokenize.cpp -$(libcppdir)/tokenlist.o: lib/tokenlist.cpp externals/simplecpp/simplecpp.h lib/astutils.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/utils.h lib/vfvalue.h +$(libcppdir)/tokenlist.o: lib/tokenlist.cpp externals/simplecpp/simplecpp.h lib/astutils.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/importproject.h lib/keywords.h lib/library.h lib/mathlib.h lib/path.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/utils.h lib/vfvalue.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/tokenlist.cpp $(libcppdir)/utils.o: lib/utils.cpp lib/config.h lib/utils.h diff --git a/lib/cppcheck.vcxproj b/lib/cppcheck.vcxproj index ac3a649c6..c64878713 100644 --- a/lib/cppcheck.vcxproj +++ b/lib/cppcheck.vcxproj @@ -83,6 +83,7 @@ + @@ -149,6 +150,7 @@ + diff --git a/lib/keywords.cpp b/lib/keywords.cpp new file mode 100644 index 000000000..aa5e8c360 --- /dev/null +++ b/lib/keywords.cpp @@ -0,0 +1,220 @@ +/* + * Cppcheck - A tool for static C/C++ code analysis + * Copyright (C) 2007-2023 Cppcheck team. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "keywords.h" + +// see https://en.cppreference.com/w/c/keyword + +#define C90_KEYWORDS \ + "auto", "break", "case", "char", "const", "continue", "default", \ + "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", \ + "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", \ + "union", "unsigned", "void", "volatile", "while" + +#define C99_KEYWORDS \ + "inline", "restrict", "_Bool", "_Complex", "_Imaginary" + +#define C11_KEYWORDS \ + "_Alignas", "_Alignof", "_Atomic", "_Generic", "_Noreturn", "_Static_assert", "_Thread_local" + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-macros" +#endif + +#define C23_KEYWORDS \ + "alignas", "alignof", "bool", "false", "nullptr", "static_assert", "thread_local", "true", "typeof", "typeof_unqual", \ + "_BitInt", "_Decimal128", "_Decimal32", "_Decimal64" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static const std::unordered_set c89_keywords_all = { + C90_KEYWORDS +}; + +static const std::unordered_set c89_keywords = c89_keywords_all; + +static const std::unordered_set c99_keywords_all = { + C90_KEYWORDS, C99_KEYWORDS +}; + +static const std::unordered_set c99_keywords = { + C99_KEYWORDS +}; + +static const std::unordered_set c11_keywords_all = { + C90_KEYWORDS, C99_KEYWORDS, C11_KEYWORDS +}; + +static const std::unordered_set c11_keywords = { + C11_KEYWORDS +}; + +/* + static const std::unordered_set c23_keywords_all = { + C90_KEYWORDS, C99_KEYWORDS, C11_KEYWORDS, C23_KEYWORDS + }; + + static const std::unordered_set c23_keywords = { + C23_KEYWORDS + }; + */ + +// see https://en.cppreference.com/w/cpp/keyword + +#define CPP03_KEYWORDS \ + "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", \ + "class", "compl", "const", "const_cast", "continue", "default", \ + "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", \ + "float", "for", "friend", "goto", "if", "inline", "int", "long", \ + "mutable", "namespace", "new", "not", "not_eq", "operator", \ + "or", "or_eq", "private", "protected", "public", "register", "reinterpret_cast", \ + "static_cast", "struct", "switch", "template", "this", "throw", \ + "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", \ + "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq" + +#define CPP11_KEYWORDS \ + "alignas", "alignof", "char16_t", "char32_t", "constexpr", "decltype", \ + "noexcept", "nullptr", "static_assert", "thread_local" + +#define CPP20_KEYWORDS \ + "concept", "consteval", "constinit", "co_await", \ + "co_return", "co_yield", "requires" + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-macros" +#endif + +#define CPP_TMTS_KEYWORDS \ + "atomic_cancel", "atomic_commit", "atomic_noexcept", "synchronized" + +#define CPP_REFL_TS_KEYWORDS \ + "reflexpr" + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static const std::unordered_set cpp03_keywords_all = { + CPP03_KEYWORDS +}; + +static const std::unordered_set cpp03_keywords = cpp03_keywords_all; + +static const std::unordered_set cpp11_keywords_all = { + CPP03_KEYWORDS, CPP11_KEYWORDS +}; + +static const std::unordered_set cpp11_keywords = { + CPP11_KEYWORDS +}; + +static const std::unordered_set cpp14_keywords_all = cpp11_keywords_all; + +static const std::unordered_set cpp14_keywords; + +static const std::unordered_set cpp17_keywords_all = cpp11_keywords_all; + +static const std::unordered_set cpp17_keywords; + +static const std::unordered_set cpp20_keywords_all = { + CPP03_KEYWORDS, CPP11_KEYWORDS, CPP20_KEYWORDS +}; + +static const std::unordered_set cpp20_keywords = { + CPP20_KEYWORDS +}; + +static const std::unordered_set cpp23_keywords; + +static const std::unordered_set cpp23_keywords_all = cpp20_keywords_all; + +// cppcheck-suppress unusedFunction +const std::unordered_set& Keywords::getAll(Standards::cstd_t cStd) +{ + // cppcheck-suppress missingReturn + switch (cStd) { + case Standards::cstd_t::C89: + return c89_keywords_all; + case Standards::cstd_t::C99: + return c99_keywords_all; + case Standards::cstd_t::C11: + return c11_keywords_all; + /*case Standards::cstd_t::C23: + return c23_keywords_all;*/ + } +} + +// cppcheck-suppress unusedFunction +const std::unordered_set& Keywords::getAll(Standards::cppstd_t cppStd) { + // cppcheck-suppress missingReturn + switch (cppStd) { + case Standards::cppstd_t::CPP03: + return cpp03_keywords_all; + case Standards::cppstd_t::CPP11: + return cpp11_keywords_all; + case Standards::cppstd_t::CPP14: + return cpp14_keywords_all; + case Standards::cppstd_t::CPP17: + return cpp17_keywords_all; + case Standards::cppstd_t::CPP20: + return cpp20_keywords_all; + case Standards::cppstd_t::CPP23: + return cpp23_keywords_all; + } +} + +// cppcheck-suppress unusedFunction +const std::unordered_set& Keywords::getOnly(Standards::cstd_t cStd) +{ + // cppcheck-suppress missingReturn + switch (cStd) { + case Standards::cstd_t::C89: + return c89_keywords; + case Standards::cstd_t::C99: + return c99_keywords; + case Standards::cstd_t::C11: + return c11_keywords; + /*case Standards::cstd_t::C23: + return c23_keywords_all;*/ + } +} + +// cppcheck-suppress unusedFunction +const std::unordered_set& Keywords::getOnly(Standards::cppstd_t cppStd) +{ + // cppcheck-suppress missingReturn + switch (cppStd) { + case Standards::cppstd_t::CPP03: + return cpp03_keywords; + case Standards::cppstd_t::CPP11: + return cpp11_keywords; + case Standards::cppstd_t::CPP14: + return cpp14_keywords; + case Standards::cppstd_t::CPP17: + return cpp17_keywords; + case Standards::cppstd_t::CPP20: + return cpp20_keywords; + case Standards::cppstd_t::CPP23: + return cpp23_keywords; + } +} + diff --git a/lib/keywords.h b/lib/keywords.h new file mode 100644 index 000000000..4b03c18aa --- /dev/null +++ b/lib/keywords.h @@ -0,0 +1,37 @@ +/* + * Cppcheck - A tool for static C/C++ code analysis + * Copyright (C) 2007-2023 Cppcheck team. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef keywordsH +#define keywordsH + +#include "standards.h" + +#include +#include + +class Keywords +{ +public: + static const std::unordered_set& getAll(Standards::cstd_t cStd); + static const std::unordered_set& getAll(Standards::cppstd_t cppStd); + + static const std::unordered_set& getOnly(Standards::cstd_t cStd); + static const std::unordered_set& getOnly(Standards::cppstd_t cppStd); +}; + +#endif diff --git a/lib/lib.pri b/lib/lib.pri index cedf747f4..c6347fec6 100644 --- a/lib/lib.pri +++ b/lib/lib.pri @@ -44,6 +44,7 @@ HEADERS += $${PWD}/analyzer.h \ $${PWD}/fwdanalysis.h \ $${PWD}/importproject.h \ $${PWD}/infer.h \ + $${PWD}/keywords.h \ $${PWD}/library.h \ $${PWD}/mathlib.h \ $${PWD}/path.h \ @@ -110,6 +111,7 @@ SOURCES += $${PWD}/analyzerinfo.cpp \ $${PWD}/fwdanalysis.cpp \ $${PWD}/importproject.cpp \ $${PWD}/infer.cpp \ + $${PWD}/keywords.cpp \ $${PWD}/library.cpp \ $${PWD}/mathlib.cpp \ $${PWD}/path.cpp \ diff --git a/lib/tokenlist.cpp b/lib/tokenlist.cpp index a2f44f2ca..645b7addd 100644 --- a/lib/tokenlist.cpp +++ b/lib/tokenlist.cpp @@ -22,6 +22,7 @@ #include "astutils.h" #include "errorlogger.h" #include "errortypes.h" +#include "keywords.h" #include "library.h" #include "mathlib.h" #include "path.h" @@ -52,41 +53,6 @@ TokenList::TokenList(const Settings* settings) : mIsCpp(false) { mTokensFrontBack.list = this; - mKeywords.insert("asm"); - mKeywords.insert("auto"); - mKeywords.insert("break"); - mKeywords.insert("case"); - //mKeywords.insert("char"); // type - mKeywords.insert("const"); - mKeywords.insert("continue"); - mKeywords.insert("default"); - mKeywords.insert("do"); - //mKeywords.insert("double"); // type - mKeywords.insert("else"); - mKeywords.insert("enum"); - mKeywords.insert("extern"); - //mKeywords.insert("float"); // type - mKeywords.insert("for"); - mKeywords.insert("goto"); - mKeywords.insert("if"); - mKeywords.insert("inline"); - //mKeywords.insert("int"); // type - //mKeywords.insert("long"); // type - mKeywords.insert("register"); - mKeywords.insert("restrict"); - mKeywords.insert("return"); - //mKeywords.insert("short"); // type - mKeywords.insert("signed"); - mKeywords.insert("sizeof"); - mKeywords.insert("static"); - mKeywords.insert("struct"); - mKeywords.insert("switch"); - mKeywords.insert("typedef"); - mKeywords.insert("union"); - mKeywords.insert("unsigned"); - mKeywords.insert("void"); - mKeywords.insert("volatile"); - mKeywords.insert("while"); } TokenList::~TokenList() @@ -124,57 +90,6 @@ void TokenList::determineCppC() mIsC = mSettings->enforcedLang == Settings::Language::C || (mSettings->enforcedLang == Settings::Language::None && Path::isC(getSourceFilePath())); mIsCpp = mSettings->enforcedLang == Settings::Language::CPP || (mSettings->enforcedLang == Settings::Language::None && Path::isCPP(getSourceFilePath())); } - - if (mIsCpp) { - //mKeywords.insert("bool"); // type - mKeywords.insert("catch"); - mKeywords.insert("class"); - mKeywords.insert("constexpr"); - mKeywords.insert("const_cast"); - mKeywords.insert("decltype"); - mKeywords.insert("delete"); - mKeywords.insert("dynamic_cast"); - mKeywords.insert("explicit"); - mKeywords.insert("export"); - //mKeywords.insert("false"); // literal - mKeywords.insert("friend"); - mKeywords.insert("mutable"); - mKeywords.insert("namespace"); - mKeywords.insert("new"); - mKeywords.insert("noexcept"); - mKeywords.insert("operator"); - mKeywords.insert("private"); - mKeywords.insert("protected"); - mKeywords.insert("public"); - mKeywords.insert("reinterpret_cast"); - mKeywords.insert("static_assert"); - mKeywords.insert("static_cast"); - mKeywords.insert("template"); - mKeywords.insert("this"); - mKeywords.insert("thread_local"); - mKeywords.insert("throw"); - //mKeywords.insert("true"); // literal - mKeywords.insert("try"); - mKeywords.insert("typeid"); - mKeywords.insert("typename"); - mKeywords.insert("typeof"); - mKeywords.insert("using"); - mKeywords.insert("virtual"); - //mKeywords.insert("wchar_t"); // type - if (!mSettings || mSettings->standards.cpp >= Standards::CPP20) { - mKeywords.insert("alignas"); - mKeywords.insert("alignof"); - mKeywords.insert("axiom"); - mKeywords.insert("co_await"); - mKeywords.insert("co_return"); - mKeywords.insert("co_yield"); - mKeywords.insert("concept"); - mKeywords.insert("synchronized"); - mKeywords.insert("consteval"); - mKeywords.insert("reflexpr"); - mKeywords.insert("requires"); - } - } } int TokenList::appendFileIfNew(std::string fileName) @@ -2050,5 +1965,30 @@ void TokenList::simplifyStdType() bool TokenList::isKeyword(const std::string &str) const { - return mKeywords.find(str) != mKeywords.end(); + if (mIsCpp) { + // TODO: integrate into keywords? + // types and literals are not handled as keywords + static const std::unordered_set cpp_types = {"bool", "false", "true"}; + if (cpp_types.find(str) != cpp_types.end()) + return false; + + // TODO: properly apply configured standard + if (!mSettings || mSettings->standards.cpp >= Standards::CPP20) { + static const auto& cpp20_keywords = Keywords::getAll(Standards::cppstd_t::CPP20); + return cpp20_keywords.find(str) != cpp20_keywords.end(); + } + + static const auto& cpp_keywords = Keywords::getAll(Standards::cppstd_t::CPP11); + return cpp_keywords.find(str) != cpp_keywords.end(); + } + + // TODO: integrate into Keywords? + // types are not handled as keywords + static const std::unordered_set c_types = {"char", "double", "float", "int", "long", "short"}; + if (c_types.find(str) != c_types.end()) + return false; + + // TODO: use configured standard + static const auto& c_keywords = Keywords::getAll(Standards::cstd_t::C99); + return c_keywords.find(str) != c_keywords.end(); } diff --git a/lib/tokenlist.h b/lib/tokenlist.h index 224d6d314..8608b7c28 100644 --- a/lib/tokenlist.h +++ b/lib/tokenlist.h @@ -27,7 +27,6 @@ #include #include #include -#include #include class Settings; @@ -212,8 +211,6 @@ private: /** settings */ const Settings* mSettings; - std::unordered_set mKeywords; - /** File is known to be C/C++ code */ bool mIsC; bool mIsCpp;