Reduced Token::Match complexity, better support for complex patterns

This commit is contained in:
PKEuS 2014-06-26 18:17:05 +02:00
parent 817d32f791
commit b4f0869a71
3 changed files with 173 additions and 228 deletions

View File

@ -279,69 +279,150 @@ const std::string &Token::strAt(int index) const
return tok ? tok->_str : emptyString; return tok ? tok->_str : emptyString;
} }
static int multiComparePercent(const Token *tok, const char ** haystack_p, bool emptyStringFound) static int multiComparePercent(const Token *tok, const char*& haystack, bool emptyStringFound, unsigned int varid)
{ {
const char *haystack = *haystack_p; ++haystack;
// Compare only the first character of the string for optimization reasons
if (haystack[0] == '%' && haystack[1] != '|' && haystack[1] != '\0' && haystack[1] != ' ') { switch (haystack[0]) {
if (haystack[1] == 'o' && // "%op%" case '\0':
haystack[2] == 'p' && case ' ':
haystack[3] == '%') { case '|':
if (tok->isOp()) //simple '%' character
return 1; haystack += 1;
*haystack_p = haystack = haystack + 4; if (tok->isArithmeticalOp() && tok->str() == "%")
} else if (haystack[1] == 'c' && // "%cop%" return 1;
haystack[2] == 'o' && break;
haystack[3] == 'p' && case 'v':
haystack[4] == '%') { // TODO: %var% should match only for
if (tok->isConstOp()) // variables that have varId != 0, but that needs a lot of
return 1; // work, before that change can be made.
*haystack_p = haystack = haystack + 5; // Any symbolname..
} else if (haystack[1] == 'o' && // "%or%" if (haystack[3] == '%') { // %var%
haystack[2] == 'r' && haystack += 4;
haystack[3] == '%') {
if (tok->type() == Token::eBitOp && tok->str() == "|")
return 1;
*haystack_p = haystack = haystack + 4;
} else if (haystack[1] == 'o' && // "%oror%"
haystack[2] == 'r' &&
haystack[3] == 'o' &&
haystack[4] == 'r' &&
haystack[5] == '%') {
if (tok->type() == Token::eLogicalOp && tok->str() == "||")
return 1;
*haystack_p = haystack = haystack + 6;
} else if (haystack[1] == 'v' && // "%var%"
haystack[2] == 'a' &&
haystack[3] == 'r' &&
haystack[4] == '%') {
if (tok->isName()) if (tok->isName())
return 1; return 1;
*haystack_p = haystack = haystack + 5; } else { // %varid%
if (varid == 0) {
throw InternalError(tok, "Internal error. Token::Match called with varid 0. Please report this to Cppcheck developers");
}
haystack += 6;
if (tok->varId() == varid)
return 1;
}
break;
case 't':
// Type (%type%)
{
haystack += 5;
if (tok->isName() && tok->varId() == 0 && !tok->isKeyword())
return 1;
}
break;
case 'a':
// Accept any token (%any%)
{
haystack += 4;
return 1;
}
break;
case 'n':
// Number (%num%)
{
haystack += 4;
if (tok->isNumber())
return 1;
}
break;
case 'c': {
haystack += 1;
// Character (%char%)
if (haystack[0] == 'h') {
haystack += 4;
if (tok->type() == Token::eChar)
return 1;
}
// Const operator (%cop%)
else if (haystack[1] == 'p') {
haystack += 3;
if (tok->isConstOp())
return 1;
}
// Comparison operator (%comp%)
else {
haystack += 4;
if (tok->isComparisonOp())
return 1;
}
}
break;
case 's':
// String (%str%)
{
haystack += 4;
if (tok->type() == Token::eString)
return 1;
}
break;
case 'b':
// Bool (%bool%)
{
haystack += 5;
if (tok->isBoolean())
return 1;
}
break;
case 'o': {
++haystack;
if (haystack[1] == '%') {
// Op (%op%)
if (haystack[0] == 'p') {
haystack += 2;
if (tok->isOp())
return 1;
}
// Or (%or%)
else {
haystack += 2;
if (tok->type() == Token::eBitOp && tok->str() == "|")
return 1;
}
} }
if (*haystack == '|') // Oror (%oror%)
*haystack_p = haystack = haystack + 1; else {
else if (*haystack == ' ' || *haystack == '\0') haystack += 4;
return emptyStringFound ? 0 : -1; if (tok->type() == Token::eLogicalOp && tok->str() == "||")
else return 1;
return -1; }
} }
break;
default:
//unknown %cmd%, abort
std::abort();
}
if (*haystack == '|')
haystack += 1;
else if (*haystack == ' ' || *haystack == '\0')
return emptyStringFound ? 0 : -1;
else
return -1;
return 0xFFFF; return 0xFFFF;
} }
int Token::multiCompare(const Token *tok, const char *haystack) int Token::multiCompare(const Token *tok, const char *haystack, unsigned int varid)
{ {
bool emptyStringFound = false; bool emptyStringFound = false;
const char *needle = tok->str().c_str(); const char *needle = tok->str().c_str();
const char *needlePointer = needle; const char *needlePointer = needle;
for (;;) { for (;;) {
if (*needlePointer == *haystack) { if (needlePointer == needle && haystack[0] == '%' && haystack[1] != '|' && haystack[1] != '\0' && haystack[1] != ' ') {
if (*needlePointer == '\0') int ret = multiComparePercent(tok, haystack, emptyStringFound, varid);
return 1; if (ret < 2)
++needlePointer; return ret;
++haystack;
} else if (*haystack == '|') { } else if (*haystack == '|') {
if (*needlePointer == 0) { if (*needlePointer == 0) {
// If needle is at the end, we have a match. // If needle is at the end, we have a match.
@ -354,14 +435,15 @@ int Token::multiCompare(const Token *tok, const char *haystack)
needlePointer = needle; needlePointer = needle;
++haystack; ++haystack;
} else if (*needlePointer == *haystack) {
if (*needlePointer == '\0')
return 1;
++needlePointer;
++haystack;
} else if (*haystack == ' ' || *haystack == '\0') { } else if (*haystack == ' ' || *haystack == '\0') {
if (needlePointer == needle) if (needlePointer == needle)
return 0; return 0;
break; break;
} else if (haystack[0] == '%' && haystack[1] != '|' && haystack[1] != '\0' && haystack[1] != ' ') {
int ret = multiComparePercent(tok, &haystack, emptyStringFound);
if (ret < 2)
return ret;
} }
// If haystack and needle don't share the same character, // If haystack and needle don't share the same character,
// find next '|' character. // find next '|' character.
@ -461,26 +543,9 @@ int Token::firstWordLen(const char *str)
return len; return len;
} }
#define multicompare(p,cond,ismulticomp) \
{ \
if (!(cond)) { \
if (*(p) != '|') \
return false; \
++(p); \
(ismulticomp) = (*(p) && *(p) != ' '); \
continue; \
} \
if (*(p) == '|') { \
while (*(p) && *(p) != ' ') \
++(p); \
} \
(ismulticomp) = false; \
}
bool Token::Match(const Token *tok, const char pattern[], unsigned int varid) bool Token::Match(const Token *tok, const char pattern[], unsigned int varid)
{ {
const char *p = pattern; const char *p = pattern;
bool ismulticomp = false;
while (*p) { while (*p) {
// Skip spaces in pattern.. // Skip spaces in pattern..
while (*p == ' ') while (*p == ' ')
@ -500,131 +565,8 @@ bool Token::Match(const Token *tok, const char pattern[], unsigned int varid)
return false; return false;
} }
// Compare the first character of the string for optimization reasons
// before doing more detailed checks.
if (p[0] == '%') {
++p;
switch (p[0]) {
case '\0':
case ' ':
case '|':
//simple '%' character
{
multicompare(p, tok->str() == "%", ismulticomp);
}
break;
case 'v':
// TODO: %var% should match only for
// variables that have varId != 0, but that needs a lot of
// work, before that change can be made.
// Any symbolname..
if (p[3] == '%') { // %var%
p += 4;
multicompare(p,tok->isName(),ismulticomp);
} else { // %varid%
if (varid == 0) {
throw InternalError(tok, "Internal error. Token::Match called with varid 0. Please report this to Cppcheck developers");
}
if (tok->varId() != varid)
return false;
p += 6;
}
break;
case 't':
// Type (%type%)
{
p += 5;
multicompare(p, tok->isName() && tok->varId() == 0 && !tok->isKeyword(), ismulticomp);
}
break;
case 'a':
// Accept any token (%any%)
{
p += 4;
if (p[0] == '|') {
while (*p && *p != ' ')
++p;
}
ismulticomp = false;
}
break;
case 'n':
// Number (%num%)
{
p += 4;
multicompare(p,tok->isNumber(),ismulticomp);
}
break;
case 'c': {
p += 1;
// Character (%char%)
if (p[0] == 'h') {
p += 4;
multicompare(p,tok->type() == eChar,ismulticomp);
}
// Const operator (%cop%)
else if (p[1] == 'p') {
p += 3;
multicompare(p,tok->isConstOp(),ismulticomp);
}
// Comparison operator (%comp%)
else {
p += 4;
multicompare(p,tok->isComparisonOp(),ismulticomp);
}
}
break;
case 's':
// String (%str%)
{
p += 4;
multicompare(p,tok->type() == eString,ismulticomp);
}
break;
case 'b':
// Bool (%bool%)
{
p += 5;
multicompare(p,tok->isBoolean(),ismulticomp);
}
break;
case 'o': {
++p;
if (p[1] == '%') {
// Op (%op%)
if (p[0] == 'p') {
p += 2;
multicompare(p,tok->isOp(),ismulticomp);
}
// Or (%or%)
else {
p += 2;
multicompare(p,tok->str() == "|",ismulticomp)
}
}
// Oror (%oror%)
else {
p += 4;
multicompare(p,tok->str() == "||",ismulticomp);
}
}
break;
default:
//unknown %cmd%, abort
std::abort();
}
}
else if (ismulticomp) {
ismulticomp = false;
continue;
}
// [.. => search for a one-character token.. // [.. => search for a one-character token..
else if (p[0] == '[' && chrInFirstWord(p, ']')) { if (p[0] == '[' && chrInFirstWord(p, ']')) {
if (tok->str().length() != 1) if (tok->str().length() != 1)
return false; return false;
@ -655,20 +597,6 @@ bool Token::Match(const Token *tok, const char pattern[], unsigned int varid)
++p; ++p;
} }
// Parse multi options, such as void|int|char (accept token which is one of these 3)
else if (chrInFirstWord(p, '|') && (p[0] != '|' || firstWordLen(p) > 2)) {
int res = multiCompare(tok, p);
if (res == 0) {
// Empty alternative matches, use the same token on next round
while (*p && *p != ' ')
++p;
continue;
} else if (res == -1) {
// No match
return false;
}
}
// Parse "not" options. Token can be anything except the given one // Parse "not" options. Token can be anything except the given one
else if (p[0] == '!' && p[1] == '!' && p[2] != '\0') { else if (p[0] == '!' && p[1] == '!' && p[2] != '\0') {
p += 2; p += 2;
@ -678,8 +606,18 @@ bool Token::Match(const Token *tok, const char pattern[], unsigned int varid)
++p; ++p;
} }
else if (!firstWordEquals(p, tok->_str.c_str())) { // Parse multi options, such as void|int|char (accept token which is one of these 3)
return false; else {
int res = multiCompare(tok, p, varid);
if (res == 0) {
// Empty alternative matches, use the same token on next round
while (*p && *p != ' ')
++p;
continue;
} else if (res == -1) {
// No match
return false;
}
} }
while (*p && *p != ' ') while (*p && *p != ' ')

View File

@ -377,7 +377,7 @@ public:
* 0 if needle was empty string * 0 if needle was empty string
* -1 if needle was not found * -1 if needle was not found
*/ */
static int multiCompare(const Token *needle, const char *haystack); static int multiCompare(const Token *needle, const char *haystack, unsigned int varid);
unsigned int linenr() const { unsigned int linenr() const {
return _linenr; return _linenr;

View File

@ -46,6 +46,7 @@ private:
TEST_CASE(multiCompare2); // #3294 - false negative multi compare between "=" and "==" TEST_CASE(multiCompare2); // #3294 - false negative multi compare between "=" and "=="
TEST_CASE(multiCompare3); // false positive for %or% on code using "|=" TEST_CASE(multiCompare3); // false positive for %or% on code using "|="
TEST_CASE(multiCompare4); TEST_CASE(multiCompare4);
TEST_CASE(multiCompare5);
TEST_CASE(getStrLength); TEST_CASE(getStrLength);
TEST_CASE(strValue); TEST_CASE(strValue);
@ -126,52 +127,52 @@ private:
// Test for found // Test for found
Token one(0); Token one(0);
one.str("one"); one.str("one");
ASSERT_EQUALS(1, Token::multiCompare(&one, "one|two")); ASSERT_EQUALS(1, Token::multiCompare(&one, "one|two", 0));
Token two(0); Token two(0);
two.str("two"); two.str("two");
ASSERT_EQUALS(1, Token::multiCompare(&two, "one|two")); ASSERT_EQUALS(1, Token::multiCompare(&two, "one|two", 0));
ASSERT_EQUALS(1, Token::multiCompare(&two, "verybig|two|")); ASSERT_EQUALS(1, Token::multiCompare(&two, "verybig|two|", 0));
// Test for empty string found // Test for empty string found
Token notfound(0); Token notfound(0);
notfound.str("notfound"); notfound.str("notfound");
ASSERT_EQUALS(0, Token::multiCompare(&notfound, "|one|two")); ASSERT_EQUALS(0, Token::multiCompare(&notfound, "|one|two", 0));
ASSERT_EQUALS(0, Token::multiCompare(&notfound, "one||two")); ASSERT_EQUALS(0, Token::multiCompare(&notfound, "one||two", 0));
ASSERT_EQUALS(0, Token::multiCompare(&notfound, "one|two|")); ASSERT_EQUALS(0, Token::multiCompare(&notfound, "one|two|", 0));
// Test for not found // Test for not found
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&notfound, "one|two"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&notfound, "one|two", 0)));
Token s(0); Token s(0);
s.str("s"); s.str("s");
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&s, "verybig|two"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&s, "verybig|two", 0)));
Token ne(0); Token ne(0);
ne.str("ne"); ne.str("ne");
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&ne, "one|two"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&ne, "one|two", 0)));
Token a(0); Token a(0);
a.str("a"); a.str("a");
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&a, "abc|def"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&a, "abc|def", 0)));
Token abcd(0); Token abcd(0);
abcd.str("abcd"); abcd.str("abcd");
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&abcd, "abc|def"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&abcd, "abc|def", 0)));
Token def(0); Token def(0);
def.str("default"); def.str("default");
ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&def, "abc|def"))); ASSERT_EQUALS(static_cast<unsigned int>(-1), static_cast<unsigned int>(Token::multiCompare(&def, "abc|def", 0)));
// %op% // %op%
Token plus(0); Token plus(0);
plus.str("+"); plus.str("+");
ASSERT_EQUALS(1, Token::multiCompare(&plus, "one|%op%")); ASSERT_EQUALS(1, Token::multiCompare(&plus, "one|%op%", 0));
ASSERT_EQUALS(1, Token::multiCompare(&plus, "%op%|two")); ASSERT_EQUALS(1, Token::multiCompare(&plus, "%op%|two", 0));
Token x(0); Token x(0);
x.str("x"); x.str("x");
ASSERT_EQUALS(-1, Token::multiCompare(&x, "one|%op%")); ASSERT_EQUALS(-1, Token::multiCompare(&x, "one|%op%", 0));
ASSERT_EQUALS(-1, Token::multiCompare(&x, "%op%|two")); ASSERT_EQUALS(-1, Token::multiCompare(&x, "%op%|two", 0));
} }
void multiCompare2() const { // #3294 void multiCompare2() const { // #3294
@ -197,8 +198,8 @@ private:
ASSERT_EQUALS(false, Token::Match(toks3.tokens(), "return %var% xyz|%or% %var% ;")); ASSERT_EQUALS(false, Token::Match(toks3.tokens(), "return %var% xyz|%or% %var% ;"));
ASSERT_EQUALS(false, Token::Match(toks3.tokens(), "return %var% %or%|xyz %var% ;")); ASSERT_EQUALS(false, Token::Match(toks3.tokens(), "return %var% %or%|xyz %var% ;"));
ASSERT_EQUALS(true, Token::Match(toks3.tokens(), "return %var% xyz|%oror% %var% ;")); ASSERT_EQUALS(true, Token::Match(toks3.tokens(), "return %var% xyz|%oror% %var% ;"));
ASSERT_EQUALS(true, Token::Match(toks3.tokens(), "return %var% %oror%|xyz %var% ;")); ASSERT_EQUALS(true, Token::Match(toks3.tokens(), "return %var% %oror%|xyz %var% ;"));
givenACodeSampleToTokenize toks4("a % b ;", true); givenACodeSampleToTokenize toks4("a % b ;", true);
ASSERT_EQUALS(true, Token::Match(toks4.tokens(), "%var% >>|<<|&|%or%|^|% %var% ;")); ASSERT_EQUALS(true, Token::Match(toks4.tokens(), "%var% >>|<<|&|%or%|^|% %var% ;"));
@ -243,6 +244,12 @@ private:
ASSERT_EQUALS(false, Token::Match(var.tokens(), "std :: queue %op%|x")); ASSERT_EQUALS(false, Token::Match(var.tokens(), "std :: queue %op%|x"));
} }
void multiCompare5() const {
Token tok(0);
tok.str("||");
ASSERT_EQUALS(true, Token::multiCompare(&tok, "+|%or%|%oror%", 0) >= 0);
}
void getStrLength() const { void getStrLength() const {
Token tok(0); Token tok(0);