Merge pull request #6 from elfring/construct_string_literals_without_using_plus_operators

Syntax changes thanks to elfring that do affect bytecode size; Construct string literals without using plus operators
This commit is contained in:
Jon Hood 2019-02-26 10:27:04 -06:00 committed by GitHub
commit 1e2e6f590f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 64 additions and 65 deletions

View File

@ -184,9 +184,9 @@ diff_line_del = re.compile(r'^-[^-].*')
# Also, "newfile" can have " (comment)" postpended. Find and eliminate this. # Also, "newfile" can have " (comment)" postpended. Find and eliminate this.
# Note that the expression below is Y10K (and Y100K) ready. :-). # Note that the expression below is Y10K (and Y100K) ready. :-).
diff_findjunk = re.compile( diff_findjunk = re.compile(
r'^(?P<filename>.*)(' + r'^(?P<filename>.*)('
r'(\s\d\d\d\d+-\d\d-\d\d\s+\d\d:\d[0-9:.]+Z?(\s+[\-\+0-9A-Z]+)?)|' + r'(\s\d\d\d\d+-\d\d-\d\d\s+\d\d:\d[0-9:.]+Z?(\s+[\-\+0-9A-Z]+)?)|'
r'(\s[A-Za-z][a-z]+\s[A-za-z][a-z]+\s\d+\s\d+:\d[0-9:.]+Z?' + r'(\s[A-Za-z][a-z]+\s[A-za-z][a-z]+\s\d+\s\d+:\d[0-9:.]+Z?'
r'(\s[\-\+0-9]*)?\s\d\d\d\d+)|' r'(\s[\-\+0-9]*)?\s\d\d\d\d+)|'
r'(\s\(.*\)))\s*$' r'(\s\(.*\)))\s*$'
) )
@ -273,7 +273,7 @@ def load_patch_info(input_patch_file):
if hunk_match: if hunk_match:
if patched_filename == "": if patched_filename == "":
error( error(
"wrong type of patch file : " + "wrong type of patch file : "
"we have a line number without having seen a filename" "we have a line number without having seen a filename"
) )
sys.exit(13) sys.exit(13)
@ -577,8 +577,8 @@ def extract_c_parameters(text, pos=0):
# In practice, this doesn't seem to be a problem; gettext() is usually # In practice, this doesn't seem to be a problem; gettext() is usually
# wrapped around the entire parameter. # wrapped around the entire parameter.
# The ?s makes it posible to match multi-line strings. # The ?s makes it posible to match multi-line strings.
gettext_pattern = re.compile(r'(?s)^\s*' + 'gettext' + r'\s*\((.*)\)\s*$') gettext_pattern = re.compile(r'(?s)^\s*' 'gettext' r'\s*\((.*)\)\s*$')
undersc_pattern = re.compile(r'(?s)^\s*' + '_(T(EXT)?)?' + r'\s*\((.*)\)\s*$') undersc_pattern = re.compile(r'(?s)^\s*' '_(T(EXT)?)?' r'\s*\((.*)\)\s*$')
def strip_i18n(text): def strip_i18n(text):
@ -642,7 +642,7 @@ def c_buffer(hit):
add_warning(hit) add_warning(hit)
p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + p_dangerous_strncat = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+'
r'\s*(\)\s*)?(-\s*1\s*)?$') r'\s*(\)\s*)?(-\s*1\s*)?$')
# This is a heuristic: constants in C are usually given in all # This is a heuristic: constants in C are usually given in all
# upper case letters. Yes, this need not be true, but it's true often # upper case letters. Yes, this need not be true, but it's true often
@ -680,7 +680,7 @@ def c_strncat(hit):
hit.level = 5 hit.level = 5
hit.note = ( hit.note = (
"Risk is high; the length parameter appears to be a constant, " "Risk is high; the length parameter appears to be a constant, "
+ "instead of computing the number of characters left.") "instead of computing the number of characters left.")
add_warning(hit) add_warning(hit)
return return
c_buffer(hit) c_buffer(hit)
@ -758,9 +758,9 @@ def c_scanf(hit):
elif p_low_risk_scanf_format.search(source): elif p_low_risk_scanf_format.search(source):
# This is often okay, but sometimes extremely serious. # This is often okay, but sometimes extremely serious.
hit.level = 1 hit.level = 1
hit.warning = ("It's unclear if the %s limit in the " + hit.warning = ("It's unclear if the %s limit in the "
"format string is small enough (CWE-120)") "format string is small enough (CWE-120)")
hit.suggestion = ("Check that the limit is sufficiently " + hit.suggestion = ("Check that the limit is sufficiently "
"small, or use a different input function") "small, or use a different input function")
else: else:
# No risky scanf request. # No risky scanf request.
@ -770,16 +770,16 @@ def c_scanf(hit):
hit.note = "No risky scanf format detected." hit.note = "No risky scanf format detected."
else: else:
# Format isn't a constant. # Format isn't a constant.
hit.note = ("If the scanf format is influenceable " + hit.note = ("If the scanf format is influenceable "
"by an attacker, it's exploitable.") "by an attacker, it's exploitable.")
add_warning(hit) add_warning(hit)
p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+' + p_dangerous_multi_byte = re.compile(r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+'
r'\s*(\)\s*)?(-\s*1\s*)?$') r'\s*(\)\s*)?(-\s*1\s*)?$')
p_safe_multi_byte = re.compile( p_safe_multi_byte = re.compile(
r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?' + r'^\s*sizeof\s*(\(\s*)?[A-Za-z_$0-9]+\s*(\)\s*)?'
r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*' + r'\[\s*0\s*\]\)\s*(-\s*1\s*)?$') r'/\s*sizeof\s*\(\s*?[A-Za-z_$0-9]+\s*\[\s*0\s*\]\)\s*(-\s*1\s*)?$')
def c_multi_byte_to_wide_char(hit): def c_multi_byte_to_wide_char(hit):
@ -792,7 +792,7 @@ def c_multi_byte_to_wide_char(hit):
hit.level = 5 hit.level = 5
hit.note = ( hit.note = (
"Risk is high, it appears that the size is given as bytes, but the " "Risk is high, it appears that the size is given as bytes, but the "
+ "function requires size as characters.") "function requires size as characters.")
elif p_safe_multi_byte.search(num_chars_to_copy): elif p_safe_multi_byte.search(num_chars_to_copy):
# This isn't really risk-free, since it might not be the destination, # This isn't really risk-free, since it might not be the destination,
# or the destination might be a character array (if it's a char pointer, # or the destination might be a character array (if it's a char pointer,
@ -897,7 +897,7 @@ c_ruleset = {
1, # Low risk level, because this is often used correctly when FIXING security 1, # Low risk level, because this is often used correctly when FIXING security
# problems, and raising it to a higher risk level would cause many false # problems, and raising it to a higher risk level would cause many false
# positives. # positives.
"Easily used incorrectly; doesn't always \\0-terminate or " + "Easily used incorrectly; doesn't always \\0-terminate or "
"check for invalid pointers [MS-banned] (CWE-120)", "check for invalid pointers [MS-banned] (CWE-120)",
"", "",
"buffer", "", {}), "buffer", "", {}),
@ -906,7 +906,7 @@ c_ruleset = {
1, # Low risk level, because this is often used correctly when FIXING security 1, # Low risk level, because this is often used correctly when FIXING security
# problems, and raising it to a higher risk levle would cause many false # problems, and raising it to a higher risk levle would cause many false
# positives. # positives.
"Easily used incorrectly; doesn't always \\0-terminate or " + "Easily used incorrectly; doesn't always \\0-terminate or "
"check for invalid pointers [MS-banned] (CWE-120)", "check for invalid pointers [MS-banned] (CWE-120)",
"", "",
"buffer", "", {}), "buffer", "", {}),
@ -933,9 +933,9 @@ c_ruleset = {
"buffer", "", {}), "buffer", "", {}),
"char|TCHAR|wchar_t": # This isn't really a function call, but it works. "char|TCHAR|wchar_t": # This isn't really a function call, but it works.
(c_static_array, 2, (c_static_array, 2,
"Statically-sized arrays can be improperly restricted, " + "Statically-sized arrays can be improperly restricted, "
"leading to potential overflows or other issues (CWE-119!/CWE-120)", "leading to potential overflows or other issues (CWE-119!/CWE-120)",
"Perform bounds checking, use functions that limit length, " + "Perform bounds checking, use functions that limit length, "
"or ensure that the size is larger than the maximum possible length", "or ensure that the size is larger than the maximum possible length",
"buffer", "", {'extract_lookahead': 1}), "buffer", "", {'extract_lookahead': 1}),
@ -965,21 +965,21 @@ c_ruleset = {
# The "syslog" hook will raise "format" issues. # The "syslog" hook will raise "format" issues.
"syslog": "syslog":
(c_printf, 4, (c_printf, 4,
"If syslog's format strings can be influenced by an attacker, " + "If syslog's format strings can be influenced by an attacker, "
"they can be exploited (CWE-134)", "they can be exploited (CWE-134)",
"Use a constant format string for syslog", "Use a constant format string for syslog",
"format", "", {'format_position': 2}), "format", "", {'format_position': 2}),
"snprintf|vsnprintf|_snprintf|_sntprintf|_vsntprintf": "snprintf|vsnprintf|_snprintf|_sntprintf|_vsntprintf":
(c_printf, 4, (c_printf, 4,
"If format strings can be influenced by an attacker, they can be " + "If format strings can be influenced by an attacker, they can be "
"exploited, and note that sprintf variations do not always \\0-terminate (CWE-134)", "exploited, and note that sprintf variations do not always \\0-terminate (CWE-134)",
"Use a constant for the format specification", "Use a constant for the format specification",
"format", "", {'format_position': 3}), "format", "", {'format_position': 3}),
"scanf|vscanf|wscanf|_tscanf|vwscanf": "scanf|vscanf|wscanf|_tscanf|vwscanf":
(c_scanf, 4, (c_scanf, 4,
"The scanf() family's %s operation, without a limit specification, " + "The scanf() family's %s operation, without a limit specification, "
"permits buffer overflows (CWE-120, CWE-20)", "permits buffer overflows (CWE-120, CWE-20)",
"Specify a limit to %s, or use a different input function", "Specify a limit to %s, or use a different input function",
"buffer", "", {'input': 1}), "buffer", "", {'input': 1}),
@ -996,8 +996,8 @@ c_ruleset = {
# Often this isn't really a risk, and even when it is, at worst it # Often this isn't really a risk, and even when it is, at worst it
# often causes a program crash (and nothing worse). # often causes a program crash (and nothing worse).
1, 1,
"Does not handle strings that are not \\0-terminated; " + "Does not handle strings that are not \\0-terminated; "
"if given one it may perform an over-read (it could cause a crash " + "if given one it may perform an over-read (it could cause a crash "
"if unprotected) (CWE-126)", "if unprotected) (CWE-126)",
"", "",
"buffer", "", {}), "buffer", "", {}),
@ -1023,10 +1023,10 @@ c_ruleset = {
"realpath": "realpath":
(normal, 3, (normal, 3,
"This function does not protect against buffer overflows, " + "This function does not protect against buffer overflows, "
"and some implementations can overflow internally (CWE-120/CWE-785!)", "and some implementations can overflow internally (CWE-120/CWE-785!)",
"Ensure that the destination buffer is at least of size MAXPATHLEN, and" + "Ensure that the destination buffer is at least of size MAXPATHLEN, and"
"to protect against implementation problems, the input argument " + "to protect against implementation problems, the input argument "
"should also be checked to ensure it is no larger than MAXPATHLEN", "should also be checked to ensure it is no larger than MAXPATHLEN",
"buffer", "dangers-c", {}), "buffer", "dangers-c", {}),
@ -1052,43 +1052,43 @@ c_ruleset = {
"access": # ???: TODO: analyze TOCTOU more carefully. "access": # ???: TODO: analyze TOCTOU more carefully.
(normal, 4, (normal, 4,
"This usually indicates a security flaw. If an " + "This usually indicates a security flaw. If an "
"attacker can change anything along the path between the " + "attacker can change anything along the path between the "
"call to access() and the file's actual use (e.g., by moving " + "call to access() and the file's actual use (e.g., by moving "
"files), the attacker can exploit the race condition (CWE-362/CWE-367!)", "files), the attacker can exploit the race condition (CWE-362/CWE-367!)",
"Set up the correct permissions (e.g., using setuid()) and " + "Set up the correct permissions (e.g., using setuid()) and "
"try to open the file directly", "try to open the file directly",
"race", "race",
"avoid-race#atomic-filesystem", {}), "avoid-race#atomic-filesystem", {}),
"chown": "chown":
(normal, 5, (normal, 5,
"This accepts filename arguments; if an attacker " + "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)", "can move those files, a race condition results. (CWE-362)",
"Use fchown( ) instead", "Use fchown( ) instead",
"race", "", {}), "race", "", {}),
"chgrp": "chgrp":
(normal, 5, (normal, 5,
"This accepts filename arguments; if an attacker " + "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)", "can move those files, a race condition results. (CWE-362)",
"Use fchgrp( ) instead", "Use fchgrp( ) instead",
"race", "", {}), "race", "", {}),
"chmod": "chmod":
(normal, 5, (normal, 5,
"This accepts filename arguments; if an attacker " + "This accepts filename arguments; if an attacker "
"can move those files, a race condition results. (CWE-362)", "can move those files, a race condition results. (CWE-362)",
"Use fchmod( ) instead", "Use fchmod( ) instead",
"race", "", {}), "race", "", {}),
"vfork": "vfork":
(normal, 2, (normal, 2,
"On some old systems, vfork() permits race conditions, and it's " + "On some old systems, vfork() permits race conditions, and it's "
"very difficult to use correctly (CWE-362)", "very difficult to use correctly (CWE-362)",
"Use fork() instead", "Use fork() instead",
"race", "", {}), "race", "", {}),
"readlink": "readlink":
(normal, 5, (normal, 5,
"This accepts filename arguments; if an attacker " + "This accepts filename arguments; if an attacker "
"can move those files or change the link content, " + "can move those files or change the link content, "
"a race condition results. " + "a race condition results. "
"Also, it does not terminate with ASCII NUL. (CWE-362, CWE-20)", "Also, it does not terminate with ASCII NUL. (CWE-362, CWE-20)",
# This is often just a bad idea, and it's hard to suggest a # This is often just a bad idea, and it's hard to suggest a
# simple alternative: # simple alternative:
@ -1134,7 +1134,7 @@ c_ruleset = {
# Windows. TODO: Detect correct usage approaches and ignore it. # Windows. TODO: Detect correct usage approaches and ignore it.
"GetTempFileName": "GetTempFileName":
(normal, 3, (normal, 3,
"Temporary file race condition in certain cases " + "Temporary file race condition in certain cases "
"(e.g., if run as SYSTEM in many versions of Windows) (CWE-377)", "(e.g., if run as SYSTEM in many versions of Windows) (CWE-377)",
"", "",
"tmpfile", "avoid-race", {}), "tmpfile", "avoid-race", {}),
@ -1143,7 +1143,7 @@ c_ruleset = {
"execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute": "execl|execlp|execle|execv|execvp|system|popen|WinExec|ShellExecute":
(normal, 4, (normal, 4,
"This causes a new program to execute and is difficult to use safely (CWE-78)", "This causes a new program to execute and is difficult to use safely (CWE-78)",
"try using a library call that implements the same functionality " + "try using a library call that implements the same functionality "
"if available", "if available",
"shell", "", {}), "shell", "", {}),
@ -1160,16 +1160,16 @@ c_ruleset = {
"CreateProcess": "CreateProcess":
(c_hit_if_null, 3, (c_hit_if_null, 3,
"This causes a new process to execute and is difficult to use safely (CWE-78)", "This causes a new process to execute and is difficult to use safely (CWE-78)",
"Specify the application path in the first argument, NOT as part of the second, " + "Specify the application path in the first argument, NOT as part of the second, "
"or embedded spaces could allow an attacker to force a different program to run", "or embedded spaces could allow an attacker to force a different program to run",
"shell", "", {'check_for_null': 1}), "shell", "", {'check_for_null': 1}),
"atoi|atol|_wtoi|_wtoi64": "atoi|atol|_wtoi|_wtoi64":
(normal, 2, (normal, 2,
"Unless checked, the resulting number can exceed the expected range " + "Unless checked, the resulting number can exceed the expected range "
"(CWE-190)", "(CWE-190)",
"If source untrusted, check both minimum and maximum, even if the" + "If source untrusted, check both minimum and maximum, even if the"
" input had no minus sign (large numbers can roll over into negative" + " input had no minus sign (large numbers can roll over into negative"
" number; consider saving to an unsigned value if that is intended)", " number; consider saving to an unsigned value if that is intended)",
"integer", "dangers-c", {}), "integer", "dangers-c", {}),
@ -1182,11 +1182,11 @@ c_ruleset = {
"crypt|crypt_r": "crypt|crypt_r":
(normal, 4, (normal, 4,
"The crypt functions use a poor one-way hashing algorithm; " + "The crypt functions use a poor one-way hashing algorithm; "
"since they only accept passwords of 8 characters or fewer " + "since they only accept passwords of 8 characters or fewer "
"and only a two-byte salt, they are excessively vulnerable to " + "and only a two-byte salt, they are excessively vulnerable to "
"dictionary attacks given today's faster computing equipment (CWE-327)", "dictionary attacks given today's faster computing equipment (CWE-327)",
"Use a different algorithm, such as SHA-256, with a larger, " + "Use a different algorithm, such as SHA-256, with a larger, "
"non-repeating salt", "non-repeating salt",
"crypto", "", {}), "crypto", "", {}),
@ -1194,7 +1194,7 @@ c_ruleset = {
"EVP_des_ecb|EVP_des_cbc|EVP_des_cfb|EVP_des_ofb|EVP_desx_cbc": "EVP_des_ecb|EVP_des_cbc|EVP_des_cfb|EVP_des_ofb|EVP_desx_cbc":
(normal, 4, (normal, 4,
"DES only supports a 56-bit keysize, which is too small given today's computers (CWE-327)", "DES only supports a 56-bit keysize, which is too small given today's computers (CWE-327)",
"Use a different patent-free encryption algorithm with a larger keysize, " + "Use a different patent-free encryption algorithm with a larger keysize, "
"such as 3DES or AES", "such as 3DES or AES",
"crypto", "", {}), "crypto", "", {}),
@ -1202,38 +1202,37 @@ c_ruleset = {
"EVP_rc4_40|EVP_rc2_40_cbc|EVP_rc2_64_cbc": "EVP_rc4_40|EVP_rc2_40_cbc|EVP_rc2_64_cbc":
(normal, 4, (normal, 4,
"These keysizes are too small given today's computers (CWE-327)", "These keysizes are too small given today's computers (CWE-327)",
"Use a different patent-free encryption algorithm with a larger keysize, " + "Use a different patent-free encryption algorithm with a larger keysize, "
"such as 3DES or AES", "such as 3DES or AES",
"crypto", "", {}), "crypto", "", {}),
"chroot": "chroot":
(normal, 3, (normal, 3,
"chroot can be very helpful, but is hard to use correctly (CWE-250, CWE-22)", "chroot can be very helpful, but is hard to use correctly (CWE-250, CWE-22)",
"Make sure the program immediately chdir(\"/\")," + "Make sure the program immediately chdir(\"/\"), closes file descriptors,"
" closes file descriptors," + " and drops root privileges, and that all necessary files"
" and drops root privileges, and that all necessary files" +
" (and no more!) are in the new root", " (and no more!) are in the new root",
"misc", "", {}), "misc", "", {}),
"getenv|curl_getenv": "getenv|curl_getenv":
(normal, 3, "Environment variables are untrustable input if they can be" + (normal, 3, "Environment variables are untrustable input if they can be"
" set by an attacker. They can have any content and" + " set by an attacker. They can have any content and"
" length, and the same variable can be set more than once (CWE-807, CWE-20)", " length, and the same variable can be set more than once (CWE-807, CWE-20)",
"Check environment variables carefully before using them", "Check environment variables carefully before using them",
"buffer", "", {'input': 1}), "buffer", "", {'input': 1}),
"g_get_home_dir": "g_get_home_dir":
(normal, 3, "This function is synonymous with 'getenv(\"HOME\")';" + (normal, 3, "This function is synonymous with 'getenv(\"HOME\")';"
"it returns untrustable input if the environment can be" + "it returns untrustable input if the environment can be"
"set by an attacker. It can have any content and length, " + "set by an attacker. It can have any content and length, "
"and the same variable can be set more than once (CWE-807, CWE-20)", "and the same variable can be set more than once (CWE-807, CWE-20)",
"Check environment variables carefully before using them", "Check environment variables carefully before using them",
"buffer", "", {'input': 1}), "buffer", "", {'input': 1}),
"g_get_tmp_dir": "g_get_tmp_dir":
(normal, 3, "This function is synonymous with 'getenv(\"TMP\")';" + (normal, 3, "This function is synonymous with 'getenv(\"TMP\")';"
"it returns untrustable input if the environment can be" + "it returns untrustable input if the environment can be"
"set by an attacker. It can have any content and length, " + "set by an attacker. It can have any content and length, "
"and the same variable can be set more than once (CWE-807, CWE-20)", "and the same variable can be set more than once (CWE-807, CWE-20)",
"Check environment variables carefully before using them", "Check environment variables carefully before using them",
"buffer", "", {'input': 1}), "buffer", "", {'input': 1}),
@ -1242,8 +1241,8 @@ c_ruleset = {
# These are Windows-unique: # These are Windows-unique:
# TODO: Should have lower risk if the program checks return value. # TODO: Should have lower risk if the program checks return value.
"RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|" + "RpcImpersonateClient|ImpersonateLoggedOnUser|CoImpersonateClient|"
"ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|" + "ImpersonateNamedPipeClient|ImpersonateDdeClientWindow|ImpersonateSecurityContext|"
"SetThreadToken": "SetThreadToken":
(normal, 4, "If this call fails, the program could fail to drop heightened privileges (CWE-250)", (normal, 4, "If this call fails, the program could fail to drop heightened privileges (CWE-250)",
"Make sure the return value is checked, and do not continue if a failure is reported", "Make sure the return value is checked, and do not continue if a failure is reported",
@ -1266,7 +1265,7 @@ c_ruleset = {
"SetSecurityDescriptorDacl": "SetSecurityDescriptorDacl":
(c_hit_if_null, 5, (c_hit_if_null, 5,
"Never create NULL ACLs; an attacker can set it to Everyone (Deny All Access), " + "Never create NULL ACLs; an attacker can set it to Everyone (Deny All Access), "
"which would even forbid administrator access (CWE-732)", "which would even forbid administrator access (CWE-732)",
"", "",
"misc", "", {'check_for_null': 3}), "misc", "", {'check_for_null': 3}),
@ -1672,7 +1671,7 @@ def display_header():
if output_format: if output_format:
print( print(
'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
+ '"http://www.w3.org/TR/html4/loose.dtd">') '"http://www.w3.org/TR/html4/loose.dtd">')
print("<html>") print("<html>")
print("<head>") print("<head>")
print('<meta http-equiv="Content-type" content="text/html; charset=utf8">') print('<meta http-equiv="Content-type" content="text/html; charset=utf8">')