Improvements for PCRE API usage (HAVE_RULES=yes) (#1384)
* PCRE: added pcre_study to improve regex matching speed. * PCRE: catch return values from pcre_exec and return an error message in case it fails. * Formatted the code. There are no functional changes intended. * PCRE: decode internal PCRE error messages. * Integrating comments from PR. * PCRE: Use pcre_study() only in case PCRE_CONFIG_JIT is defined. * PCRE: Fixed potential resource leak. In case prce_compile worked, but pcre_study() returns an error, the allocated resources by pcre_compile() can be freed. * Make travis happy. * PCRE: Improved output message format. * PCRE: Attempt to be compliant to older PCRE versions and fix travis build.
This commit is contained in:
parent
6bde2445a6
commit
29d7872440
192
lib/cppcheck.cpp
192
lib/cppcheck.cpp
|
@ -590,6 +590,140 @@ void CppCheck::checkSimplifiedTokens(const Tokenizer &tokenizer)
|
||||||
executeRules("simple", tokenizer);
|
executeRules("simple", tokenizer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_RULES
|
||||||
|
|
||||||
|
static const char * pcreErrorCodeToString(const int pcreExecRet)
|
||||||
|
{
|
||||||
|
switch (pcreExecRet) {
|
||||||
|
case PCRE_ERROR_NULL:
|
||||||
|
return "Either code or subject was passed as NULL, or ovector was NULL "
|
||||||
|
"and ovecsize was not zero (PCRE_ERROR_NULL)";
|
||||||
|
case PCRE_ERROR_BADOPTION:
|
||||||
|
return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)";
|
||||||
|
case PCRE_ERROR_BADMAGIC:
|
||||||
|
return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, "
|
||||||
|
"to catch the case when it is passed a junk pointer and to detect when a "
|
||||||
|
"pattern that was compiled in an environment of one endianness is run in "
|
||||||
|
"an environment with the other endianness. This is the error that PCRE "
|
||||||
|
"gives when the magic number is not present (PCRE_ERROR_BADMAGIC)";
|
||||||
|
case PCRE_ERROR_UNKNOWN_NODE:
|
||||||
|
return "While running the pattern match, an unknown item was encountered in the "
|
||||||
|
"compiled pattern. This error could be caused by a bug in PCRE or by "
|
||||||
|
"overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)";
|
||||||
|
case PCRE_ERROR_NOMEMORY:
|
||||||
|
return "If a pattern contains back references, but the ovector that is passed "
|
||||||
|
"to pcre_exec() is not big enough to remember the referenced substrings, "
|
||||||
|
"PCRE gets a block of memory at the start of matching to use for this purpose. "
|
||||||
|
"If the call via pcre_malloc() fails, this error is given. The memory is "
|
||||||
|
"automatically freed at the end of matching. This error is also given if "
|
||||||
|
"pcre_stack_malloc() fails in pcre_exec(). "
|
||||||
|
"This can happen only when PCRE has been compiled with "
|
||||||
|
"--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)";
|
||||||
|
case PCRE_ERROR_NOSUBSTRING:
|
||||||
|
return "This error is used by the pcre_copy_substring(), pcre_get_substring(), "
|
||||||
|
"and pcre_get_substring_list() functions (see below). "
|
||||||
|
"It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)";
|
||||||
|
case PCRE_ERROR_MATCHLIMIT:
|
||||||
|
return "The backtracking limit, as specified by the match_limit field in a pcre_extra "
|
||||||
|
"structure (or defaulted) was reached. "
|
||||||
|
"See the description above (PCRE_ERROR_MATCHLIMIT)";
|
||||||
|
case PCRE_ERROR_CALLOUT:
|
||||||
|
return "This error is never generated by pcre_exec() itself. "
|
||||||
|
"It is provided for use by callout functions that want to yield a distinctive "
|
||||||
|
"error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)";
|
||||||
|
case PCRE_ERROR_BADUTF8:
|
||||||
|
return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, "
|
||||||
|
"and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector "
|
||||||
|
"(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 "
|
||||||
|
"character is placed in the first element, and a reason code is placed in the "
|
||||||
|
"second element. The reason codes are listed in the following section. For "
|
||||||
|
"backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated "
|
||||||
|
"UTF-8 character at the end of the subject (reason codes 1 to 5), "
|
||||||
|
"PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8";
|
||||||
|
case PCRE_ERROR_BADUTF8_OFFSET:
|
||||||
|
return "The UTF-8 byte sequence that was passed as a subject was checked and found to "
|
||||||
|
"be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of "
|
||||||
|
"startoffset did not point to the beginning of a UTF-8 character or the end of "
|
||||||
|
"the subject (PCRE_ERROR_BADUTF8_OFFSET)";
|
||||||
|
case PCRE_ERROR_PARTIAL:
|
||||||
|
return "The subject string did not match, but it did match partially. See the "
|
||||||
|
"pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)";
|
||||||
|
case PCRE_ERROR_BADPARTIAL:
|
||||||
|
return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL "
|
||||||
|
"option was used with a compiled pattern containing items that were not supported "
|
||||||
|
"for partial matching. From release 8.00 onwards, there are no restrictions on "
|
||||||
|
"partial matching (PCRE_ERROR_BADPARTIAL)";
|
||||||
|
case PCRE_ERROR_INTERNAL:
|
||||||
|
return "An unexpected internal error has occurred. This error could be caused by a bug "
|
||||||
|
"in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)";
|
||||||
|
case PCRE_ERROR_BADCOUNT:
|
||||||
|
return"This error is given if the value of the ovecsize argument is negative "
|
||||||
|
"(PCRE_ERROR_BADCOUNT)";
|
||||||
|
case PCRE_ERROR_RECURSIONLIMIT :
|
||||||
|
return "The internal recursion limit, as specified by the match_limit_recursion "
|
||||||
|
"field in a pcre_extra structure (or defaulted) was reached. "
|
||||||
|
"See the description above (PCRE_ERROR_RECURSIONLIMIT)";
|
||||||
|
case PCRE_ERROR_DFA_UITEM:
|
||||||
|
return "PCRE_ERROR_DFA_UITEM";
|
||||||
|
case PCRE_ERROR_DFA_UCOND:
|
||||||
|
return "PCRE_ERROR_DFA_UCOND";
|
||||||
|
case PCRE_ERROR_DFA_WSSIZE:
|
||||||
|
return "PCRE_ERROR_DFA_WSSIZE";
|
||||||
|
case PCRE_ERROR_DFA_RECURSE:
|
||||||
|
return "PCRE_ERROR_DFA_RECURSE";
|
||||||
|
case PCRE_ERROR_NULLWSLIMIT:
|
||||||
|
return "PCRE_ERROR_NULLWSLIMIT";
|
||||||
|
case PCRE_ERROR_BADNEWLINE:
|
||||||
|
return "An invalid combination of PCRE_NEWLINE_xxx options was "
|
||||||
|
"given (PCRE_ERROR_BADNEWLINE)";
|
||||||
|
case PCRE_ERROR_BADOFFSET:
|
||||||
|
return "The value of startoffset was negative or greater than the length "
|
||||||
|
"of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)";
|
||||||
|
case PCRE_ERROR_SHORTUTF8:
|
||||||
|
return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject "
|
||||||
|
"string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. "
|
||||||
|
"Information about the failure is returned as for PCRE_ERROR_BADUTF8. "
|
||||||
|
"It is in fact sufficient to detect this case, but this special error code for "
|
||||||
|
"PCRE_PARTIAL_HARD precedes the implementation of returned information; "
|
||||||
|
"it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)";
|
||||||
|
case PCRE_ERROR_RECURSELOOP:
|
||||||
|
return "This error is returned when pcre_exec() detects a recursion loop "
|
||||||
|
"within the pattern. Specifically, it means that either the whole pattern "
|
||||||
|
"or a subpattern has been called recursively for the second time at the same "
|
||||||
|
"position in the subject string. Some simple patterns that might do this "
|
||||||
|
"are detected and faulted at compile time, but more complicated cases, "
|
||||||
|
"in particular mutual recursions between two different subpatterns, "
|
||||||
|
"cannot be detected until run time (PCRE_ERROR_RECURSELOOP)";
|
||||||
|
case PCRE_ERROR_JIT_STACKLIMIT:
|
||||||
|
return "This error is returned when a pattern that was successfully studied "
|
||||||
|
"using a JIT compile option is being matched, but the memory available "
|
||||||
|
"for the just-in-time processing stack is not large enough. See the pcrejit "
|
||||||
|
"documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)";
|
||||||
|
case PCRE_ERROR_BADMODE:
|
||||||
|
return "This error is given if a pattern that was compiled by the 8-bit library "
|
||||||
|
"is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)";
|
||||||
|
case PCRE_ERROR_BADENDIANNESS:
|
||||||
|
return "This error is given if a pattern that was compiled and saved is reloaded on a "
|
||||||
|
"host with different endianness. The utility function pcre_pattern_to_host_byte_order() "
|
||||||
|
"can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)";
|
||||||
|
case PCRE_ERROR_DFA_BADRESTART:
|
||||||
|
return "PCRE_ERROR_DFA_BADRESTART";
|
||||||
|
#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
|
||||||
|
case PCRE_ERROR_BADLENGTH:
|
||||||
|
return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)";
|
||||||
|
case PCRE_ERROR_JIT_BADOPTION:
|
||||||
|
return "This error is returned when a pattern that was successfully studied using a JIT compile "
|
||||||
|
"option is being matched, but the matching mode (partial or complete match) does not correspond "
|
||||||
|
"to any JIT compilation mode. When the JIT fast path function is used, this error may be "
|
||||||
|
"also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAVE_RULES
|
||||||
|
|
||||||
|
|
||||||
void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer)
|
void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer)
|
||||||
{
|
{
|
||||||
(void)tokenlist;
|
(void)tokenlist;
|
||||||
|
@ -618,15 +752,16 @@ void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &token
|
||||||
if (rule.pattern.empty() || rule.id.empty() || rule.severity == Severity::none || rule.tokenlist != tokenlist)
|
if (rule.pattern.empty() || rule.id.empty() || rule.severity == Severity::none || rule.tokenlist != tokenlist)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const char *error = nullptr;
|
const char *pcreCompileErrorStr = nullptr;
|
||||||
int erroffset = 0;
|
int erroffset = 0;
|
||||||
pcre *re = pcre_compile(rule.pattern.c_str(),0,&error,&erroffset,nullptr);
|
pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr);
|
||||||
if (!re) {
|
if (!re) {
|
||||||
if (error) {
|
if (pcreCompileErrorStr) {
|
||||||
ErrorLogger::ErrorMessage errmsg(std::list<ErrorLogger::ErrorMessage::FileLocation>(),
|
const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr);
|
||||||
|
const ErrorLogger::ErrorMessage errmsg(std::list<ErrorLogger::ErrorMessage::FileLocation>(),
|
||||||
emptyString,
|
emptyString,
|
||||||
Severity::error,
|
Severity::error,
|
||||||
error,
|
msg,
|
||||||
"pcre_compile",
|
"pcre_compile",
|
||||||
false);
|
false);
|
||||||
|
|
||||||
|
@ -635,9 +770,49 @@ void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &token
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Optimize the regex, but only if PCRE_CONFIG_JIT is available
|
||||||
|
#ifdef PCRE_CONFIG_JIT
|
||||||
|
const char *pcreStudyErrorStr = nullptr;
|
||||||
|
pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr);
|
||||||
|
// pcre_study() returns NULL for both errors and when it can not optimize the regex.
|
||||||
|
// The last argument is how one checks for errors.
|
||||||
|
// It is NULL if everything works, and points to an error string otherwise.
|
||||||
|
if (pcreStudyErrorStr) {
|
||||||
|
const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr);
|
||||||
|
const ErrorLogger::ErrorMessage errmsg(std::list<ErrorLogger::ErrorMessage::FileLocation>(),
|
||||||
|
emptyString,
|
||||||
|
Severity::error,
|
||||||
|
msg,
|
||||||
|
"pcre_study",
|
||||||
|
false);
|
||||||
|
|
||||||
|
reportErr(errmsg);
|
||||||
|
// pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile().
|
||||||
|
pcre_free(re);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const pcre_extra * const pcreExtra = nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
int ovector[30]= {0};
|
int ovector[30]= {0};
|
||||||
while (pos < (int)str.size() && 0 <= pcre_exec(re, nullptr, str.c_str(), (int)str.size(), pos, 0, ovector, 30)) {
|
while (pos < (int)str.size()) {
|
||||||
|
const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), (int)str.size(), pos, 0, ovector, 30);
|
||||||
|
if (pcreExecRet < 0) {
|
||||||
|
const std::string errorMessage = std::string("pcre_exec failed: ") + pcreErrorCodeToString(pcreExecRet);
|
||||||
|
if (!errorMessage.empty()) {
|
||||||
|
const ErrorLogger::ErrorMessage errmsg(std::list<ErrorLogger::ErrorMessage::FileLocation>(),
|
||||||
|
emptyString,
|
||||||
|
Severity::error,
|
||||||
|
errorMessage,
|
||||||
|
"pcre_exec",
|
||||||
|
false);
|
||||||
|
|
||||||
|
reportErr(errmsg);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
const unsigned int pos1 = (unsigned int)ovector[0];
|
const unsigned int pos1 = (unsigned int)ovector[0];
|
||||||
const unsigned int pos2 = (unsigned int)ovector[1];
|
const unsigned int pos2 = (unsigned int)ovector[1];
|
||||||
|
|
||||||
|
@ -674,6 +849,11 @@ void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &token
|
||||||
}
|
}
|
||||||
|
|
||||||
pcre_free(re);
|
pcre_free(re);
|
||||||
|
|
||||||
|
// Free up the EXTRA PCRE value (may be NULL at this point)
|
||||||
|
if (pcreExtra) {
|
||||||
|
pcre_free_study(pcreExtra);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue