Merge branch 'sarifOutput'

My SINCERE THANKS to yongyan-gh for the hard work to integrate
SARIF output functionality into flawfinder!!

Signed-off-by: David A. Wheeler <dwheeler@dwheeler.com>
This commit is contained in:
David A. Wheeler 2021-05-30 14:41:39 -04:00
commit 3bc5f16c4c
1 changed files with 184 additions and 4 deletions

186
flawfinder.py Normal file → Executable file
View File

@ -53,7 +53,7 @@ import operator # To support filename expansion on Windows
import time
import csv # To support generating CSV format
import hashlib
# import formatter
import json
version = "2.0.15"
@ -87,6 +87,7 @@ output_format = 0 # 0 = normal, 1 = html.
single_line = 0 # 1 = singleline (can 't be 0 if html)
csv_output = 0 # 1 = Generate CSV
csv_writer = None
sarif_output = 0 # 1 = Generate SARIF report
omit_time = 0 # 1 = omit time-to-run (needed for testing)
required_regex = None # If non-None, regex that must be met to report
required_regex_compiled = None
@ -125,6 +126,173 @@ def print_warning(message):
sys.stderr.write("\n")
sys.stderr.flush()
def to_json(o):
return json.dumps(o, default=lambda o: o.__dict__, sort_keys=False, indent=2)
# The following implements the SarifLogger.
# We intentionally merge all of flawfinder's functionality into 1 file
# so it's trivial to copy & use elsewhere.
class SarifLogger(object):
_hitlist = None
TOOL_NAME = "Flawfinder"
TOOL_URL = "https://dwheeler.com/flawfinder/"
TOOL_VERSION = "2.0.15"
URI_BASE_ID = "SRCROOT"
SARIF_SCHEMA = "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json"
SARIF_SCHEMA_VERSION = "2.1.0"
CWE_TAXONOMY_NAME = "CWE"
CWE_TAXONOMY_URI = "https://raw.githubusercontent.com/sarif-standard/taxonomies/main/CWE_v4.4.sarif"
CWE_TAXONOMY_GUID = "FFC64C90-42B6-44CE-8BEB-F6B7DAE649E5"
def __init__ (self, hits):
self._hitlist = hits
def output_sarif(self):
tool = {
"driver": {
"name": self.TOOL_NAME,
"version": self.TOOL_VERSION,
"informationUri": self.TOOL_URL,
"rules": self._extract_rules(self._hitlist),
"supportedTaxonomies": [{
"name": self.CWE_TAXONOMY_NAME,
"guid": self.CWE_TAXONOMY_GUID,
}],
}
}
runs = [{
"tool": tool,
"columnKind": "utf16CodeUnits",
"results": self._extract_results(self._hitlist),
"externalPropertyFileReferences": {
"taxonomies": [{
"location": {
"uri": self.CWE_TAXONOMY_URI,
},
"guid": self.CWE_TAXONOMY_GUID,
}],
},
}]
report = {
"$schema": self.SARIF_SCHEMA,
"version": self.SARIF_SCHEMA_VERSION,
"runs": runs,
}
jsonstr = to_json(report)
return jsonstr
def _extract_rules(self, hitlist):
rules = {}
for hit in hitlist:
if not hit.ruleid in rules:
rules[hit.ruleid] = self._to_sarif_rule(hit)
return list(rules.values())
def _extract_results(self, hitlist):
results = []
for hit in hitlist:
results.append(self._to_sarif_result(hit))
return results
def _to_sarif_rule(self, hit):
return {
"id": hit.ruleid,
"name": "{0}/{1}".format(hit.category, hit.name),
"shortDescription": {
"text": self._append_period(hit.warning),
},
"defaultConfiguration": {
"level": self._to_sarif_level(hit.defaultlevel),
},
"helpUri": hit.helpuri(),
"relationships": self._extract_relationships(hit.cwes()),
}
def _to_sarif_result(self, hit):
return {
"ruleId": hit.ruleid,
"level": self._to_sarif_level(hit.level),
"message": {
"text": self._append_period("{0}/{1}:{2}".format(hit.category, hit.name, hit.warning)),
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": self._to_uri_path(hit.filename),
"uriBaseId": self.URI_BASE_ID,
},
"region": {
"startLine": hit.line,
"startColumn": hit.column,
"endColumn": len(hit.context_text) + 1,
"snippet": {
"text": hit.context_text,
}
}
}
}],
"fingerprints": {
"contextHash/v1": hit.fingerprint()
},
"rank": self._to_sarif_rank(hit.level),
}
def _extract_relationships(self, cwestring):
# example cwe string "CWE-119!/ CWE-120", "CWE-829, CWE-20"
relationships = []
for cwe in re.split(',|/',cwestring):
cwestr = cwe.strip()
if cwestr:
relationship = {
"target": {
"id": int(cwestr.replace("CWE-", "").replace("!", "")),
"toolComponent": {
"name": self.CWE_TAXONOMY_NAME,
"guid": self.CWE_TAXONOMY_GUID,
},
},
"kinds": [
"relevant" if cwestr[-1] != '!' else "incomparable"
],
}
relationships.append(relationship)
return relationships
@staticmethod
def _to_sarif_level(level):
# level 4 & 5
if level >= 4:
return "error"
# level 3
if level == 3:
return "warning"
# level 0 1 2
return "note"
@staticmethod
def _to_sarif_rank(level):
#SARIF rank FF Level SARIF level Default Viewer Action
#0.0 0 note Does not display by default
#0.2 1 note Does not display by default
#0.4 2 note Does not display by default
#0.6 3 warning Displays by default, does not break build / other processes
#0.8 4 error Displays by default, breaks build/ other processes
#1.0 5 error Displays by default, breaks build/ other processes
return level * 0.2
@staticmethod
def _to_uri_path(path):
return path.replace("\\", "/")
@staticmethod
def _append_period(text):
return text if text[-1] == '.' else text + "."
# The following code accepts unified diff format from both subversion (svn)
# and GNU diff, which aren't well-documented. It gets filenames from
@ -437,6 +605,8 @@ class Hit(object):
if csv_output:
self.show_csv()
return
if sarif_output:
return
if output_format:
print("<li>", end='')
sys.stdout.write(h(self.filename))
@ -1795,6 +1965,8 @@ def display_header():
'Suggestion', 'Note', 'CWEs', 'Context', 'Fingerprint', 'ToolVersion', 'RuleId', 'HelpUri'
])
return
if sarif_output:
return
if not showheading:
return
if not displayed_header:
@ -2017,6 +2189,7 @@ flawfinder [--help | -h] [--version] [--listrules]
Display as HTML output.
--immediate | -i
Immediately display hits (don't just wait until the end).
--sarif Generate output in SARIF format.
--singleline | -S
Single-line output.
--omittime Omit time to run.
@ -2049,7 +2222,7 @@ flawfinder [--help | -h] [--version] [--listrules]
def process_options():
global show_context, show_inputs, allowlink, skipdotdir, omit_time
global output_format, minimum_level, show_immediately, single_line
global csv_output, csv_writer
global csv_output, csv_writer, sarif_output
global error_level
global required_regex, required_regex_compiled
global falsepositive
@ -2063,7 +2236,7 @@ def process_options():
"falsepositive", "falsepositives", "columns", "listrules",
"omittime", "allowlink", "patch=", "followdotdir", "neverignore",
"regex=", "quiet", "dataonly", "html", "singleline", "csv",
"error-level=",
"error-level=", "sarif",
"loadhitlist=", "savehitlist=", "diffhitlist=", "version", "help"
])
for (opt, value) in optlist:
@ -2102,6 +2275,10 @@ def process_options():
quiet = 1
showheading = 0
csv_writer = csv.writer(sys.stdout)
elif opt == "--sarif":
sarif_output = 1
quiet = 1
showheading = 0
elif opt == "--error-level":
error_level = int(value)
elif opt == "--immediate" or opt == "-i":
@ -2354,6 +2531,9 @@ def flawfind():
display_header()
initialize_ruleset()
if process_files():
if sarif_output:
print(SarifLogger(hitlist).output_sarif())
else:
show_final_results()
save_if_desired()
return 1 if error_level_exceeded else 0