2011-01-04 07:50:54 +01:00
|
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
2011-06-27 23:36:23 +02:00
|
|
|
<section>
|
2011-01-04 07:50:54 +01:00
|
|
|
<info>
|
2011-06-27 23:36:23 +02:00
|
|
|
<title>Part 3 - Introduction to writing rules with C++</title>
|
2011-01-04 07:50:54 +01:00
|
|
|
|
|
|
|
<author>
|
|
|
|
<personname><firstname>Daniel</firstname><surname>Marjamäki</surname></personname>
|
|
|
|
|
|
|
|
<affiliation>
|
|
|
|
<orgname>Cppcheck</orgname>
|
|
|
|
</affiliation>
|
|
|
|
</author>
|
|
|
|
|
|
|
|
<pubdate>2011</pubdate>
|
|
|
|
</info>
|
|
|
|
|
|
|
|
<section>
|
|
|
|
<title>Introduction</title>
|
|
|
|
|
|
|
|
<para>The goal for this article is to introduce how
|
2011-07-01 17:06:56 +02:00
|
|
|
Cppcheck rules are written with C++. With C++ it is
|
2011-01-04 07:50:54 +01:00
|
|
|
possible to write more complex rules than is possible with regular
|
|
|
|
expressions.</para>
|
|
|
|
</section>
|
|
|
|
|
|
|
|
<section>
|
|
|
|
<title>Basics</title>
|
|
|
|
|
|
|
|
<para>A C++ rule is written in a C++ function.</para>
|
|
|
|
|
|
|
|
<para>Rules are organized into Check classes. For instance there is a
|
|
|
|
class with the name <literal>CheckStl</literal> that contains various stl
|
|
|
|
rules. The <literal>CheckOther</literal> can always be used if no other
|
|
|
|
class suits you.</para>
|
|
|
|
|
|
|
|
<para>When you have added your rule you must recompile Cppcheck before you
|
|
|
|
can test it.</para>
|
|
|
|
</section>
|
|
|
|
|
|
|
|
<section>
|
|
|
|
<title>Division by zero</title>
|
|
|
|
|
|
|
|
<para>This simple regular expression will check for division by
|
|
|
|
zero:</para>
|
|
|
|
|
|
|
|
<programlisting>cppcheck --rule="/ 0"</programlisting>
|
|
|
|
|
|
|
|
<para>Here is the corresponding C++ check:</para>
|
|
|
|
|
|
|
|
<programlisting>// Detect division by zero
|
|
|
|
void CheckOther::divisionByZero()
|
|
|
|
{
|
|
|
|
// Loop through all tokens
|
|
|
|
for (const Token *tok = _tokenizer->tokens(); tok; tok = tok->next())
|
|
|
|
{
|
|
|
|
// check if there is a division by zero
|
|
|
|
if (Token::Match(tok, "/ 0"))
|
|
|
|
{
|
|
|
|
// report error
|
|
|
|
divisionByZeroError(tok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report error
|
|
|
|
void CheckOther::divisionByZeroError()
|
|
|
|
{
|
2011-01-08 10:20:29 +01:00
|
|
|
reportError(tok, // location
|
|
|
|
Severity::error, // severity
|
|
|
|
"divisionByZero", // id
|
|
|
|
"Division by zero"); // message
|
2011-01-04 07:50:54 +01:00
|
|
|
}</programlisting>
|
|
|
|
|
|
|
|
<para>The <literal>Token::Match</literal> matches tokens against
|
|
|
|
expressions. A few rules about Token::Match expressions are:</para>
|
|
|
|
|
|
|
|
<itemizedlist>
|
|
|
|
<listitem>
|
|
|
|
<para>tokens are either completely matched or not matched at all. The
|
|
|
|
token "abc" is not matched by "ab".</para>
|
|
|
|
</listitem>
|
|
|
|
|
|
|
|
<listitem>
|
|
|
|
<para>Spaces are used as separators.</para>
|
|
|
|
</listitem>
|
|
|
|
|
|
|
|
<listitem>
|
|
|
|
<para>With normal regular expressions there are special meanings for +
|
|
|
|
* ? ( ). These are just normal characters in
|
|
|
|
<literal>Token::Match</literal> patterns.</para>
|
|
|
|
</listitem>
|
|
|
|
</itemizedlist>
|
|
|
|
</section>
|
|
|
|
|
|
|
|
<section>
|
|
|
|
<title>Condition before deallocation</title>
|
|
|
|
|
|
|
|
<para>In the first <literal>Writing rules</literal> article I described a
|
|
|
|
rule that looks for redundant conditions. Here is the regular expression
|
|
|
|
that was shown:</para>
|
|
|
|
|
|
|
|
<programlisting>if \( p \) { free \( p \) ; }</programlisting>
|
|
|
|
|
|
|
|
<para>The corresponding <literal>Token::Match</literal> expression
|
|
|
|
is:</para>
|
|
|
|
|
|
|
|
<programlisting>if ( %var% ) { free ( %var% ) ; }</programlisting>
|
|
|
|
|
2011-01-08 11:10:10 +01:00
|
|
|
<para>The <literal>%var%</literal> pattern match any variable name. Here
|
|
|
|
is a C++ function:</para>
|
2011-01-04 07:50:54 +01:00
|
|
|
|
|
|
|
<programlisting>// Find redundant condition before deallocation
|
|
|
|
void CheckOther::dealloc()
|
|
|
|
{
|
|
|
|
// Loop through all tokens
|
|
|
|
for (const Token *tok = _tokenizer->tokens(); tok; tok = tok->next())
|
|
|
|
{
|
|
|
|
// Is there a condition and a deallocation?
|
|
|
|
if (Token::Match(tok, "if ( %var% ) { free ( %var% ) ; }"))
|
|
|
|
{
|
|
|
|
// Get variable name used in condition:
|
|
|
|
const std::string varname1 = tok->strAt(2);
|
|
|
|
|
|
|
|
// Get variable name used in deallocation:
|
|
|
|
const std::string varname2 = tok->strAt(7);
|
|
|
|
|
|
|
|
// Is the same variable used?
|
|
|
|
if (varname1 == varname2)
|
|
|
|
{
|
|
|
|
// report warning
|
|
|
|
deallocWarning(tok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report warning
|
|
|
|
void CheckOther::deallocWarning()
|
|
|
|
{
|
2011-01-08 10:20:29 +01:00
|
|
|
reportError(tok, // location
|
|
|
|
Severity::warning, // severity
|
|
|
|
"dealloc", // id
|
|
|
|
"Redundant condition"); // message
|
2011-01-04 07:50:54 +01:00
|
|
|
}</programlisting>
|
|
|
|
|
|
|
|
<para>The strAt function is used to fetch strings from the token list. The
|
2011-01-08 10:20:29 +01:00
|
|
|
parameter specifies the token offset. The result for "tok->tokAt(1)" is
|
|
|
|
the same as for "tok->next()".</para>
|
|
|
|
</section>
|
|
|
|
|
|
|
|
<section>
|
|
|
|
<title>Validate function parameters</title>
|
|
|
|
|
|
|
|
<para>Sometimes it is known that a function can't handle certain
|
|
|
|
parameters. Here is an example rule that checks that the parameters for
|
|
|
|
strtol or strtoul are valid:</para>
|
|
|
|
|
|
|
|
<programlisting>//---------------------------------------------------------------------------
|
|
|
|
// strtol(str, 0, radix) <- radix must be 0 or 2-36
|
|
|
|
//---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
void CheckOther::invalidFunctionUsage()
|
|
|
|
{
|
|
|
|
// Loop through all tokens
|
|
|
|
for (const Token *tok = _tokenizer->tokens(); tok; tok = tok->next())
|
|
|
|
{
|
|
|
|
// Is there a function call for strtol or strtoul?
|
|
|
|
if (!Token::Match(tok, "strtol|strtoul ("))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Locate the third parameter of the function call..
|
|
|
|
|
|
|
|
// Counter that counts the parameters.
|
|
|
|
int param = 1;
|
|
|
|
|
|
|
|
// Scan the function call tokens. The "tok->tokAt(2)" returns
|
|
|
|
// the token after the "("
|
|
|
|
for (const Token *tok2 = tok->tokAt(2); tok2; tok2 = tok2->next())
|
|
|
|
{
|
|
|
|
// If a "(" is found then jump to the corresponding ")"
|
|
|
|
if (tok2->str() == "(")
|
|
|
|
tok2 = tok2->link();
|
|
|
|
|
|
|
|
// End of function call.
|
|
|
|
else if (tok2->str() == ")")
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Found a ",". increment param counter
|
|
|
|
else if (tok2->str() == ",")
|
|
|
|
{
|
|
|
|
++param;
|
|
|
|
|
|
|
|
// If the param is 3 then check if the parameter is valid
|
|
|
|
if (param == 3)
|
|
|
|
{
|
|
|
|
if (Token::Match(tok2, ", %num% )"))
|
|
|
|
{
|
|
|
|
// convert next token into a number
|
|
|
|
MathLib::bigint radix;
|
|
|
|
radix = MathLib::toLongNumber(tok2->strAt(1));
|
|
|
|
|
|
|
|
// invalid radix?
|
|
|
|
if (!(radix == 0 || (radix >= 2 && radix <= 36)))
|
|
|
|
{
|
|
|
|
dangerousUsageStrtolError(tok2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CheckOther::dangerousUsageStrtolError(const Token *tok)
|
|
|
|
{
|
|
|
|
reportError(tok, // location
|
|
|
|
Severity::error, // severity
|
|
|
|
"dangerousUsageStrtol", // id
|
|
|
|
"Invalid radix"); // message
|
|
|
|
}</programlisting>
|
|
|
|
|
|
|
|
<para>The link() member function is used to find the corresponding ( ) [ ]
|
|
|
|
or { } token.</para>
|
|
|
|
|
|
|
|
<para>The inner loop is not necessary if you just want to get the last
|
|
|
|
parameter. This code will check if the last parameter is
|
|
|
|
numerical..</para>
|
|
|
|
|
|
|
|
<programlisting>..
|
|
|
|
// Is there a function call?
|
|
|
|
if (!Token::Match(tok, "do_something ("))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (Token::Match(tok->next()->link()->tokAt(-2), "(|, %num% )"))
|
|
|
|
...</programlisting>
|
|
|
|
|
|
|
|
<para>The pattern <literal>(|,</literal> can also be written as
|
|
|
|
<literal>[(,]</literal>.</para>
|
2011-01-04 07:50:54 +01:00
|
|
|
</section>
|
2011-06-27 23:36:23 +02:00
|
|
|
</section>
|