Cppcheck - HTML report

#!/usr/bin/env python from __future__ import unicode_literals import io import sys import optparse import os import operator from collections import Counter from pygments import highlight from pygments.lexers import guess_lexer, guess_lexer_for_filename from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module from pygments.util import ClassNotFound from xml.sax import parse as xml_parse from xml.sax import SAXParseException as XmlParseException from xml.sax.handler import ContentHandler as XmlContentHandler from xml.sax.saxutils import escape """ Turns a cppcheck xml file into a browsable html report along with syntax highlighted source code. """ STYLE_FILE = """ body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif; font-size: 13px; line-height: 1.5; margin: 0; width: auto; } h1 { margin: 10px; } .header { border-bottom: thin solid #aaa; } .footer { border-top: thin solid #aaa; font-size: 90%; margin-top: 5px; } .footer ul { list-style-type: none; padding-left: 0; } .footer > p { margin: 4px; } .wrapper { display: -webkit-box; display: -ms-flexbox; display: flex; -webkit-box-pack: justify; -ms-flex-pack: justify; justify-content: space-between; } #menu, #menu_index { text-align: left; width: 350px; height: 90vh; min-height: 200px; overflow: auto; position: -webkit-sticky; position: sticky; top: 0; padding: 0 15px 15px 15px; } #menu > a { display: block; margin-left: 10px; font-size: 12px; z-index: 1; } #content, #content_index { background-color: #fff; -webkit-box-sizing: content-box; -moz-box-sizing: content-box; box-sizing: content-box; padding: 0 15px 15px 15px; width: calc(100% - 350px); height: 100%; overflow-x: auto; } #filename { margin-left: 10px; font-size: 12px; z-index: 1; } .error { background-color: #ffb7b7; } .error2 { background-color: #faa; display: inline-block; margin-left: 4px; } .inconclusive { background-color: #b6b6b4; } .inconclusive2 { background-color: #b6b6b4; display: inline-block; margin-left: 4px; } .verbose { display: inline-block; vertical-align: top; cursor: help; } .verbose .content { display: none; position: absolute; padding: 10px; margin: 4px; max-width: 40%; white-space: pre-wrap; border: 1px solid #000; background-color: #ffffcc; cursor: auto; } .highlight .hll { padding: 1px; } .highlighttable { background-color: #fff; z-index: 10; position: relative; margin: -10px; } .linenos { border-right: thin solid #aaa; color: #d3d3d3; padding-right: 6px; } .d-none { display: none; } """ HTML_HEAD = """ Cppcheck - HTML report - %s

""" HTML_FOOTER = """

""" HTML_ERROR = "<--- %s\n" HTML_INCONCLUSIVE = "<--- %s\n" HTML_EXPANDABLE_ERROR = "

<--- %s [+]

\n""" HTML_EXPANDABLE_INCONCLUSIVE = "

<--- %s [+]

\n""" # escape() and unescape() takes care of &, < and >. html_escape_table = { '"': """, "'": "'" } html_unescape_table = {v: k for k, v in html_escape_table.items()} def html_escape(text): return escape(text, html_escape_table) class AnnotateCodeFormatter(HtmlFormatter): errors = [] def wrap(self, source, outfile): line_no = 1 for i, t in HtmlFormatter.wrap(self, source, outfile): # If this is a source code line we want to add a span tag at the # end. if i == 1: for error in self.errors: if error['line'] == line_no: try: if error['inconclusive'] == 'true': # only print verbose msg if it really differs # from actual message if error.get('verbose') and (error['verbose'] != error['msg']): index = t.rfind('\n') t = t[:index] + HTML_EXPANDABLE_INCONCLUSIVE % (error['msg'], html_escape(error['verbose'].replace("\\012", '\n'))) + t[index + 1:] else: t = t.replace('\n', HTML_INCONCLUSIVE % error['msg']) except KeyError: if error.get('verbose') and (error['verbose'] != error['msg']): index = t.rfind('\n') t = t[:index] + HTML_EXPANDABLE_ERROR % (error['msg'], html_escape(error['verbose'].replace("\\012", '\n'))) + t[index + 1:] else: t = t.replace('\n', HTML_ERROR % error['msg']) line_no = line_no + 1 yield i, t class CppCheckHandler(XmlContentHandler): """Parses the cppcheck xml file and produces a list of all its errors.""" def __init__(self): XmlContentHandler.__init__(self) self.errors = [] self.version = '1' self.versionCppcheck = '' def startElement(self, name, attributes): if name == 'results': self.version = attributes.get('version', self.version) if self.version == '1': self.handleVersion1(name, attributes) else: self.handleVersion2(name, attributes) def handleVersion1(self, name, attributes): if name != 'error': return self.errors.append({ 'file': attributes.get('file', ''), 'line': int(attributes.get('line', 0)), 'locations': [{ 'file': attributes.get('file', ''), 'line': int(attributes.get('line', 0)), }], 'id': attributes['id'], 'severity': attributes['severity'], 'msg': attributes['msg'] }) def handleVersion2(self, name, attributes): if name == 'cppcheck': self.versionCppcheck = attributes['version'] if name == 'error': error = { 'locations': [], 'file': '', 'line': 0, 'id': attributes['id'], 'severity': attributes['severity'], 'msg': attributes['msg'], 'verbose': attributes.get('verbose') } if 'inconclusive' in attributes: error['inconclusive'] = attributes['inconclusive'] if 'cwe' in attributes: error['cwe'] = attributes['cwe'] self.errors.append(error) elif name == 'location': assert self.errors error = self.errors[-1] locations = error['locations'] file = attributes['file'] line = int(attributes['line']) if not locations: error['file'] = file error['line'] = line locations.append({ 'file': file, 'line': line, 'info': attributes.get('info') }) if __name__ == '__main__': # Configure all the options this little utility is using. parser = optparse.OptionParser() parser.add_option('--title', dest='title', help='The title of the project.', default='[project name]') parser.add_option('--file', dest='file', action="append", help='The cppcheck xml output file to read defects ' 'from. You can combine results from several ' 'xml reports i.e. "--file file1.xml --file file2.xml ..". ' 'Default is reading from stdin.') parser.add_option('--report-dir', dest='report_dir', help='The directory where the HTML report content is ' 'written.') parser.add_option('--source-dir', dest='source_dir', help='Base directory where source code files can be ' 'found.') parser.add_option('--source-encoding', dest='source_encoding', help='Encoding of source code.', default='utf-8') # Parse options and make sure that we have an output directory set. options, args = parser.parse_args() try: sys.argv[1] except IndexError: # no arguments give, print --help parser.print_help() quit() if not options.report_dir: parser.error('No report directory set.') # Get the directory where source code files are located. source_dir = os.getcwd() if options.source_dir: source_dir = options.source_dir # Parse the xml from all files defined in file argument # or from stdin. If no input is provided, stdin is used # Produce a simple list of errors. print('Parsing xml report.') try: contentHandler = CppCheckHandler() for fname in options.file or [sys.stdin]: xml_parse(fname, contentHandler) except (XmlParseException, ValueError) as msg: print('Failed to parse cppcheck xml file: %s' % msg) sys.exit(1) # We have a list of errors. But now we want to group them on # each source code file. Lets create a files dictionary that # will contain a list of all the errors in that file. For each # file we will also generate a HTML filename to use. files = {} file_no = 0 for error in contentHandler.errors: filename = error['file'] if filename not in files.keys(): files[filename] = { 'errors': [], 'htmlfile': str(file_no) + '.html'} file_no = file_no + 1 files[filename]['errors'].append(error) # Make sure that the report directory is created if it doesn't exist. print('Creating %s directory' % options.report_dir) if not os.path.exists(options.report_dir): os.makedirs(options.report_dir) # Generate a HTML file with syntax highlighted source code for each # file that contains one or more errors. print('Processing errors') decode_errors = [] for filename, data in sorted(files.items()): htmlfile = data['htmlfile'] errors = [] for error in data['errors']: for location in error['locations']: if filename == location['file']: newError = dict(error) del newError['locations'] newError['line'] = location['line'] if location.get('info'): newError['msg'] = location['info'] newError['severity'] = 'information' del newError['verbose'] errors.append(newError) lines = [] for error in errors: lines.append(error['line']) if filename == '': continue source_filename = os.path.join(source_dir, filename) try: with io.open(source_filename, 'r', encoding=options.source_encoding) as input_file: content = input_file.read() except IOError: if error['id'] == 'unmatchedSuppression': continue # file not found, bail out else: sys.stderr.write("ERROR: Source file '%s' not found.\n" % source_filename) continue except UnicodeDecodeError: sys.stderr.write("WARNING: Unicode decode error in '%s'.\n" % source_filename) decode_errors.append(source_filename[2:]) # "[2:]" gets rid of "./" at beginning continue htmlFormatter = AnnotateCodeFormatter(linenos=True, style='colorful', hl_lines=lines, lineanchors='line', encoding=options.source_encoding) htmlFormatter.errors = errors with io.open(os.path.join(options.report_dir, htmlfile), 'w', encoding='utf-8') as output_file: output_file.write(HTML_HEAD % (options.title, htmlFormatter.get_style_defs('.highlight'), options.title, filename, filename.split('/')[-1])) for error in sorted(errors, key=lambda k: k['line']): output_file.write(" %s %s" % (data['htmlfile'], error['line'], error['id'], error['line'])) output_file.write(HTML_HEAD_END) try: lexer = guess_lexer_for_filename(source_filename, '', stripnl=False) except ClassNotFound: try: lexer = guess_lexer(content, stripnl=False) except ClassNotFound: sys.stderr.write("ERROR: Couldn't determine lexer for the file' " + source_filename + " '. Won't be able to syntax highlight this file.") output_file.write("\n Could not generate content because pygments failed to determine the code type.") output_file.write("\n Sorry about this.") continue if options.source_encoding: lexer.encoding = options.source_encoding output_file.write( highlight(content, lexer, htmlFormatter).decode( options.source_encoding)) output_file.write(HTML_FOOTER % contentHandler.versionCppcheck) print(' ' + filename) # Generate a master index.html file that will contain a list of # all the errors created. print('Creating index.html') with io.open(os.path.join(options.report_dir, 'index.html'), 'w') as output_file: stats_count = 0 stats = [] for filename, data in sorted(files.items()): for error in data['errors']: stats.append(error['id']) # get the stats stats_count += 1 counter = Counter(stats) stat_html = [] # the following lines sort the stat primary by value (occurrences), # but if two IDs occur equally often, then we sort them alphabetically by warning ID try: cnt_max = counter.most_common()[0][1] except IndexError: cnt_max = 0 try: cnt_min = counter.most_common()[-1][1] except IndexError: cnt_min = 0 stat_fmt = "\n {}{}" for occurrences in reversed(range(cnt_min, cnt_max + 1)): for _id in [k for k, v in sorted(counter.items()) if v == occurrences]: stat_html.append(stat_fmt.format(_id, _id, dict(counter.most_common())[_id], _id)) output_file.write(HTML_HEAD.replace('id="menu"', 'id="menu_index"', 1).replace("Defects:", "Defect summary;", 1) % (options.title, '', options.title, '', '')) output_file.write('\n Toggle all') output_file.write('\n ') output_file.write('\n ') output_file.write(''.join(stat_html)) output_file.write('\n ') output_file.write('\n

Show	#	Defect ID
	' + str(stats_count) + '	total

') output_file.write('\n

Statistics

') output_file.write(HTML_HEAD_END.replace("content", "content_index", 1)) output_file.write('\n ') output_file.write('\n ') for filename, data in sorted(files.items()): if filename in decode_errors: # don't print a link but a note output_file.write("\n " % filename) output_file.write("\n ") else: if filename.endswith('*'): # assume unmatched suppression output_file.write( "\n " % filename) else: output_file.write( "\n " % (data['htmlfile'], filename)) for error in sorted(data['errors'], key=lambda k: k['line']): error_class = '' try: if error['inconclusive'] == 'true': error_class = 'class="inconclusive"' error['severity'] += ", inconcl." except KeyError: pass try: if error['cwe']: cwe_url = "" + error['cwe'] + "" except KeyError: cwe_url = "" if error['severity'] == 'error': error_class = 'class="error"' if error['id'] == 'missingInclude': output_file.write( '\n ' % (error['id'], error['id'], error['severity'], html_escape(error['msg']))) elif (error['id'] == 'unmatchedSuppression') and filename.endswith('*'): output_file.write( '\n ' % (error['id'], error['id'], error['severity'], error_class, html_escape(error['msg']))) else: output_file.write( '\n ' % (error['id'], data['htmlfile'], error['line'], error['line'], error['id'], cwe_url, error['severity'], error_class, html_escape(error['msg']))) output_file.write('\n

Line	Id	CWE	Severity	Message
%s
Could not generated due to UnicodeDecodeError
%s
%s
	%s		%s	%s
	%s		%s	%s
%d	%s	%s	%s	%s

') output_file.write(HTML_FOOTER % contentHandler.versionCppcheck) if decode_errors: sys.stderr.write("\nGenerating html failed for the following files: " + ' '.join(decode_errors)) sys.stderr.write("\nConsider changing source-encoding (for example: \"htmlreport ... --source-encoding=\"iso8859-1\"\"\n") print('Creating style.css file') with io.open(os.path.join(options.report_dir, 'style.css'), 'w') as css_file: css_file.write(STYLE_FILE) print("Creating stats.html (statistics)\n") stats_countlist = {} for filename, data in sorted(files.items()): if filename == '': continue stats_tmplist = [] for error in sorted(data['errors'], key=lambda k: k['line']): stats_tmplist.append(error['severity']) stats_countlist[filename] = dict(Counter(stats_tmplist)) # get top ten for each severity SEVERITIES = "error", "warning", "portability", "performance", "style", "unusedFunction", "information", "missingInclude", "internal" with io.open(os.path.join(options.report_dir, 'stats.html'), 'w') as stats_file: stats_file.write(HTML_HEAD.replace('id="menu"', 'id="menu_index"', 1).replace("Defects:", "Back to summary", 1) % (options.title, '', options.title, 'Statistics', '')) stats_file.write(HTML_HEAD_END.replace("content", "content_index", 1)) for sev in SEVERITIES: _sum = 0 stats_templist = {} # if the we have an style warning but we are checking for # portability, we have to skip it to prevent KeyError try: for filename in stats_countlist: try: # also bail out if we have a file with no sev-results _sum += stats_countlist[filename][sev] stats_templist[filename] = int(stats_countlist[filename][sev]) # file : amount, except KeyError: continue # don't print "0 style" etc, if no style warnings were found if _sum == 0: continue except KeyError: continue stats_file.write("

Top 10 files for " + sev + " severity, total findings: " + str(_sum) + "
\n") # sort, so that the file with the most severities per type is first stats_list_sorted = sorted(stats_templist.items(), key=operator.itemgetter(1, 0), reverse=True) it = 0 LENGTH = 0 for i in stats_list_sorted: # printing loop # for aesthetics: if it's the first iteration of the loop, get # the max length of the number string if it == 0: LENGTH = len(str(i[1])) # <- length of longest number, now get the difference and try to make other numbers align to it stats_file.write(" " * 3 + str(i[1]) + " " * (1 + LENGTH - len(str(i[1]))) + " " + i[0] + "
\n") it += 1 if it == 10: # print only the top 10 break stats_file.write("

\n") stats_file.write(HTML_FOOTER % contentHandler.versionCppcheck) print("\nOpen '" + options.report_dir + "/index.html' to see the results.")

Cppcheck report - %s: %s