diff --git a/lib/cppcheck.cpp b/lib/cppcheck.cpp index 6ed8116cf..53e547fad 100644 --- a/lib/cppcheck.cpp +++ b/lib/cppcheck.cpp @@ -590,6 +590,140 @@ void CppCheck::checkSimplifiedTokens(const Tokenizer &tokenizer) executeRules("simple", tokenizer); } +#ifdef HAVE_RULES + +static const char * pcreErrorCodeToString(const int pcreExecRet) +{ + switch (pcreExecRet) { + case PCRE_ERROR_NULL: + return "Either code or subject was passed as NULL, or ovector was NULL " + "and ovecsize was not zero (PCRE_ERROR_NULL)"; + case PCRE_ERROR_BADOPTION: + return "An unrecognized bit was set in the options argument (PCRE_ERROR_BADOPTION)"; + case PCRE_ERROR_BADMAGIC: + return "PCRE stores a 4-byte \"magic number\" at the start of the compiled code, " + "to catch the case when it is passed a junk pointer and to detect when a " + "pattern that was compiled in an environment of one endianness is run in " + "an environment with the other endianness. This is the error that PCRE " + "gives when the magic number is not present (PCRE_ERROR_BADMAGIC)"; + case PCRE_ERROR_UNKNOWN_NODE: + return "While running the pattern match, an unknown item was encountered in the " + "compiled pattern. This error could be caused by a bug in PCRE or by " + "overwriting of the compiled pattern (PCRE_ERROR_UNKNOWN_NODE)"; + case PCRE_ERROR_NOMEMORY: + return "If a pattern contains back references, but the ovector that is passed " + "to pcre_exec() is not big enough to remember the referenced substrings, " + "PCRE gets a block of memory at the start of matching to use for this purpose. " + "If the call via pcre_malloc() fails, this error is given. The memory is " + "automatically freed at the end of matching. This error is also given if " + "pcre_stack_malloc() fails in pcre_exec(). " + "This can happen only when PCRE has been compiled with " + "--disable-stack-for-recursion (PCRE_ERROR_NOMEMORY)"; + case PCRE_ERROR_NOSUBSTRING: + return "This error is used by the pcre_copy_substring(), pcre_get_substring(), " + "and pcre_get_substring_list() functions (see below). " + "It is never returned by pcre_exec() (PCRE_ERROR_NOSUBSTRING)"; + case PCRE_ERROR_MATCHLIMIT: + return "The backtracking limit, as specified by the match_limit field in a pcre_extra " + "structure (or defaulted) was reached. " + "See the description above (PCRE_ERROR_MATCHLIMIT)"; + case PCRE_ERROR_CALLOUT: + return "This error is never generated by pcre_exec() itself. " + "It is provided for use by callout functions that want to yield a distinctive " + "error code. See the pcrecallout documentation for details (PCRE_ERROR_CALLOUT)"; + case PCRE_ERROR_BADUTF8: + return "A string that contains an invalid UTF-8 byte sequence was passed as a subject, " + "and the PCRE_NO_UTF8_CHECK option was not set. If the size of the output vector " + "(ovecsize) is at least 2, the byte offset to the start of the the invalid UTF-8 " + "character is placed in the first element, and a reason code is placed in the " + "second element. The reason codes are listed in the following section. For " + "backward compatibility, if PCRE_PARTIAL_HARD is set and the problem is a truncated " + "UTF-8 character at the end of the subject (reason codes 1 to 5), " + "PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8"; + case PCRE_ERROR_BADUTF8_OFFSET: + return "The UTF-8 byte sequence that was passed as a subject was checked and found to " + "be valid (the PCRE_NO_UTF8_CHECK option was not set), but the value of " + "startoffset did not point to the beginning of a UTF-8 character or the end of " + "the subject (PCRE_ERROR_BADUTF8_OFFSET)"; + case PCRE_ERROR_PARTIAL: + return "The subject string did not match, but it did match partially. See the " + "pcrepartial documentation for details of partial matching (PCRE_ERROR_PARTIAL)"; + case PCRE_ERROR_BADPARTIAL: + return "This code is no longer in use. It was formerly returned when the PCRE_PARTIAL " + "option was used with a compiled pattern containing items that were not supported " + "for partial matching. From release 8.00 onwards, there are no restrictions on " + "partial matching (PCRE_ERROR_BADPARTIAL)"; + case PCRE_ERROR_INTERNAL: + return "An unexpected internal error has occurred. This error could be caused by a bug " + "in PCRE or by overwriting of the compiled pattern (PCRE_ERROR_INTERNAL)"; + case PCRE_ERROR_BADCOUNT: + return"This error is given if the value of the ovecsize argument is negative " + "(PCRE_ERROR_BADCOUNT)"; + case PCRE_ERROR_RECURSIONLIMIT : + return "The internal recursion limit, as specified by the match_limit_recursion " + "field in a pcre_extra structure (or defaulted) was reached. " + "See the description above (PCRE_ERROR_RECURSIONLIMIT)"; + case PCRE_ERROR_DFA_UITEM: + return "PCRE_ERROR_DFA_UITEM"; + case PCRE_ERROR_DFA_UCOND: + return "PCRE_ERROR_DFA_UCOND"; + case PCRE_ERROR_DFA_WSSIZE: + return "PCRE_ERROR_DFA_WSSIZE"; + case PCRE_ERROR_DFA_RECURSE: + return "PCRE_ERROR_DFA_RECURSE"; + case PCRE_ERROR_NULLWSLIMIT: + return "PCRE_ERROR_NULLWSLIMIT"; + case PCRE_ERROR_BADNEWLINE: + return "An invalid combination of PCRE_NEWLINE_xxx options was " + "given (PCRE_ERROR_BADNEWLINE)"; + case PCRE_ERROR_BADOFFSET: + return "The value of startoffset was negative or greater than the length " + "of the subject, that is, the value in length (PCRE_ERROR_BADOFFSET)"; + case PCRE_ERROR_SHORTUTF8: + return "This error is returned instead of PCRE_ERROR_BADUTF8 when the subject " + "string ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD option is set. " + "Information about the failure is returned as for PCRE_ERROR_BADUTF8. " + "It is in fact sufficient to detect this case, but this special error code for " + "PCRE_PARTIAL_HARD precedes the implementation of returned information; " + "it is retained for backwards compatibility (PCRE_ERROR_SHORTUTF8)"; + case PCRE_ERROR_RECURSELOOP: + return "This error is returned when pcre_exec() detects a recursion loop " + "within the pattern. Specifically, it means that either the whole pattern " + "or a subpattern has been called recursively for the second time at the same " + "position in the subject string. Some simple patterns that might do this " + "are detected and faulted at compile time, but more complicated cases, " + "in particular mutual recursions between two different subpatterns, " + "cannot be detected until run time (PCRE_ERROR_RECURSELOOP)"; + case PCRE_ERROR_JIT_STACKLIMIT: + return "This error is returned when a pattern that was successfully studied " + "using a JIT compile option is being matched, but the memory available " + "for the just-in-time processing stack is not large enough. See the pcrejit " + "documentation for more details (PCRE_ERROR_JIT_STACKLIMIT)"; + case PCRE_ERROR_BADMODE: + return "This error is given if a pattern that was compiled by the 8-bit library " + "is passed to a 16-bit or 32-bit library function, or vice versa (PCRE_ERROR_BADMODE)"; + case PCRE_ERROR_BADENDIANNESS: + return "This error is given if a pattern that was compiled and saved is reloaded on a " + "host with different endianness. The utility function pcre_pattern_to_host_byte_order() " + "can be used to convert such a pattern so that it runs on the new host (PCRE_ERROR_BADENDIANNESS)"; + case PCRE_ERROR_DFA_BADRESTART: + return "PCRE_ERROR_DFA_BADRESTART"; +#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32 + case PCRE_ERROR_BADLENGTH: + return "This error is given if pcre_exec() is called with a negative value for the length argument (PCRE_ERROR_BADLENGTH)"; + case PCRE_ERROR_JIT_BADOPTION: + return "This error is returned when a pattern that was successfully studied using a JIT compile " + "option is being matched, but the matching mode (partial or complete match) does not correspond " + "to any JIT compilation mode. When the JIT fast path function is used, this error may be " + "also given for invalid options. See the pcrejit documentation for more details (PCRE_ERROR_JIT_BADOPTION)"; +#endif + } + return ""; +} + +#endif // HAVE_RULES + + void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &tokenizer) { (void)tokenlist; @@ -618,26 +752,67 @@ void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &token if (rule.pattern.empty() || rule.id.empty() || rule.severity == Severity::none || rule.tokenlist != tokenlist) continue; - const char *error = nullptr; + const char *pcreCompileErrorStr = nullptr; int erroffset = 0; - pcre *re = pcre_compile(rule.pattern.c_str(),0,&error,&erroffset,nullptr); + pcre * const re = pcre_compile(rule.pattern.c_str(),0,&pcreCompileErrorStr,&erroffset,nullptr); if (!re) { - if (error) { - ErrorLogger::ErrorMessage errmsg(std::list(), - emptyString, - Severity::error, - error, - "pcre_compile", - false); + if (pcreCompileErrorStr) { + const std::string msg = "pcre_compile failed: " + std::string(pcreCompileErrorStr); + const ErrorLogger::ErrorMessage errmsg(std::list(), + emptyString, + Severity::error, + msg, + "pcre_compile", + false); reportErr(errmsg); } continue; } + // Optimize the regex, but only if PCRE_CONFIG_JIT is available +#ifdef PCRE_CONFIG_JIT + const char *pcreStudyErrorStr = nullptr; + pcre_extra * const pcreExtra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &pcreStudyErrorStr); + // pcre_study() returns NULL for both errors and when it can not optimize the regex. + // The last argument is how one checks for errors. + // It is NULL if everything works, and points to an error string otherwise. + if (pcreStudyErrorStr) { + const std::string msg = "pcre_study failed: " + std::string(pcreStudyErrorStr); + const ErrorLogger::ErrorMessage errmsg(std::list(), + emptyString, + Severity::error, + msg, + "pcre_study", + false); + + reportErr(errmsg); + // pcre_compile() worked, but pcre_study() returned an error. Free the resources allocated by pcre_compile(). + pcre_free(re); + continue; + } +#else + const pcre_extra * const pcreExtra = nullptr; +#endif + int pos = 0; int ovector[30]= {0}; - while (pos < (int)str.size() && 0 <= pcre_exec(re, nullptr, str.c_str(), (int)str.size(), pos, 0, ovector, 30)) { + while (pos < (int)str.size()) { + const int pcreExecRet = pcre_exec(re, pcreExtra, str.c_str(), (int)str.size(), pos, 0, ovector, 30); + if (pcreExecRet < 0) { + const std::string errorMessage = std::string("pcre_exec failed: ") + pcreErrorCodeToString(pcreExecRet); + if (!errorMessage.empty()) { + const ErrorLogger::ErrorMessage errmsg(std::list(), + emptyString, + Severity::error, + errorMessage, + "pcre_exec", + false); + + reportErr(errmsg); + } + break; + } const unsigned int pos1 = (unsigned int)ovector[0]; const unsigned int pos2 = (unsigned int)ovector[1]; @@ -674,6 +849,11 @@ void CppCheck::executeRules(const std::string &tokenlist, const Tokenizer &token } pcre_free(re); + + // Free up the EXTRA PCRE value (may be NULL at this point) + if (pcreExtra) { + pcre_free_study(pcreExtra); + } } #endif }