The threadsafety.py addon now flags MT-Unsafe symbols and functions. (#5086)

This commit is contained in:
Andrew C Aitchison 2023-06-08 13:46:09 +01:00 committed by GitHub
parent 520a6644a6
commit 0727528876
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 643 additions and 23 deletions

View File

@ -378,8 +378,8 @@ jobs:
# TODO: move to scriptcheck.yml so these are tested with all Python versions?
- name: Test addons
run: |
./cppcheck --addon=threadsafety addons/test/threadsafety
./cppcheck --addon=threadsafety --std=c++03 addons/test/threadsafety
./cppcheck --error-exitcode=1 --inline-suppr --addon=threadsafety addons/test/threadsafety
./cppcheck --error-exitcode=1 --inline-suppr --addon=threadsafety --std=c++03 addons/test/threadsafety
./cppcheck --addon=misra --enable=style --inline-suppr --enable=information --error-exitcode=1 addons/test/misra/misra-ctu-*-test.c
pushd addons/test
# We'll force C89 standard to enable an additional verification for

View File

@ -0,0 +1,67 @@
/*
* Cppcheck - A tool for static C/C++ code analysis
* Copyright (C) 2023 Cppcheck team.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This does not match the standard cppchek test code,
* because I haven't figured that out yet.
* This code does compile and run, and does demonstrate
* the issues that the threadsafety.py addon is supposed to find.
* It does not use threads !
*/
#include <stdio.h>
#include <time.h>
#include <string.h>
void threadsafety_static()
{
// cppcheck-suppress threadsafety-threadsafety
static unsigned int nCount = 0;
nCount++;
printf("%d\n", nCount);
}
void threadsafety_call()
{
time_t now = time(nullptr);
// cppcheck-suppress threadsafety-unsafe-call
printf("%s\n", ctime(&now));
}
// cppcheck --addon=threadsafety
// should *not* find any problems with this function.
void threadsafety_safecall()
{
char haystack[] = "alphabet";
char needle[] = "Alph";
char* found = strcasestr(haystack, needle);
printf("%s %sin %s\n", needle, found ? "" : "not ", haystack);
}
int main() {
threadsafety_static();
threadsafety_call();
threadsafety_safecall();
threadsafety_static();
return 0;
}

View File

@ -2,5 +2,6 @@ struct Dummy {
int x;
};
void func() {
static Dummy dummy;
// cppcheck-suppress threadsafety-threadsafety
static Dummy dummy;
}

View File

@ -2,5 +2,6 @@ struct Dummy {
int x;
};
void func() {
// cppcheck-suppress threadsafety-threadsafety-const
static const Dummy dummy;
}

View File

@ -1,39 +1,390 @@
#!/usr/bin/env python3
#
# This script analyses Cppcheck dump files to locate threadsafety issues
# - warn about static local objects
#
"""
cppcheck addon for threadsafety detection.
import cppcheckdata
This script analyses Cppcheck dump files to locate threadsafety issues.
It warns about
- static local objects
- MT-Unsafe symbols listed in the "Attributes" sections of man pages.
"""
from __future__ import print_function
import os
import re
import sys
def reportError(token, severity, msg, id):
cppcheckdata.reportError(token, severity, msg, 'threadsafety', id)
import cppcheckdata
# --------------------------------
# List of MT-Unsafe identifiers
# --------------------------------
# This is Work In Progress.
# Eventually it should contain all identifiers (types
# and functions) which are MT-Unsafe.
# The script tools/MT-Unsafe.py can help to re-generate this list.
# It reads a man-page tree and report identifiers marked as "MT-Unsafe"
# (see man 7 attributes for what this means), eg
# MT-Unsafe.py /usr/share/man/man3
id_MTunsafe_full = {
# MT-Unsafe types by definition
# 'pthread_t',
# Types marked MT-Unsafe
'const:env',
'const:hostid',
'const:mallopt',
'const:sigintr',
'race:LogMask',
'race:asctime',
'race:crypt',
'race:crypt_gensalt',
'race:cuserid/!string',
'race:dirstream',
'race:drand48',
'race:ecvt',
'race:exit',
'race:fcvt',
'race:fgetgrent',
'race:fgetpwent',
'race:fgetspent',
'race:fsent',
'race:getdate',
'race:getlogin',
'race:getopt',
'race:getspent',
'race:getspnam',
'race:grent',
'race:grgid',
'race:grnam',
'race:hostbyaddr',
'race:hostbyname',
'race:hostbyname2',
'race:hostent',
'race:hsearch',
'race:l64a',
'race:localeconv',
'race:mbrlen/!ps',
'race:mbrtowc/!ps',
'race:mbsnrtowcs/!ps',
'race:mbsrtowcs/!ps',
'race:mcheck',
'race:mntentbuf',
'race:netbyaddr',
'race:netbyname',
'race:netent',
'race:netgrent',
'race:protobyname',
'race:protobynumber',
'race:protoent',
'race:ptsname',
'race:pwent',
'race:pwnam',
'race:pwuid',
'race:qecvt',
'race:qfcvt',
'race:servbyname',
'race:servbyport',
'race:servent',
'race:sgetspent',
'race:signgam',
'race:stdin',
'race:stdout',
'race:streams',
'race:strerror',
'race:strsignal',
'race:strtok',
'race:tmbuf',
'race:tmpnam/!s',
'race:ttyent',
'race:ttyname',
'race:utent',
'race:wcrtomb/!ps',
'race:wcsnrtombs/!ps',
'race:wcsrtombs/!ps',
'sig:ALRM',
'sig:SIGCHLD/linux',
# APIs marked MT-Unsafe
'asctime',
'clearenv',
'ctime',
'cuserid',
'drand48',
'ecvt',
'encrypt',
'endfsent',
'endgrent',
'endhostent',
'endnetent',
'endnetgrent',
'endprotoent',
'endpwent',
'endservent',
'endspent',
'endttyent',
'endusershell',
'endutent',
'erand48',
'error_at_line',
'ether_aton',
'ether_ntoa',
'exit',
'fcloseall',
'fcvt',
'fgetgrent',
'fgetpwent',
'fgetspent',
'fts_children',
'fts_read',
'gamma',
'gammaf',
'gammal',
'getaliasbyname',
'getaliasent',
'getchar_unlocked',
'getdate',
'getfsent',
'getfsfile',
'getfsspec',
'getgrent',
'getgrent_r',
'getgrgid',
'getgrnam',
'gethostbyaddr',
'gethostbyname',
'gethostbyname2',
'gethostent',
'gethostent_r',
'getlogin',
'getlogin_r',
'getmntent',
'getnetbyaddr',
'getnetbyname',
'getnetent',
'getnetgrent',
'getnetgrent_r',
'getopt',
'getopt_long',
'getopt_long_only',
'getpass',
'getprotobyname',
'getprotobynumber',
'getprotoent',
'getpwent',
'getpwent_r',
'getpwnam',
'getpwuid',
'getrpcbyname',
'getrpcbynumber',
'getrpcent',
'getservbyname',
'getservbyport',
'getservent',
'getspent',
'getspent_r',
'getspnam',
'getttyent',
'getttynam',
'getusershell',
'getutent',
'getutid',
'getutline',
'getwchar_unlocked',
'glob',
'gmtime',
'hcreate',
'hdestroy',
'hsearch',
'innetgr',
'jrand48',
'l64a',
'lcong48',
'localeconv',
'localtime',
'login',
'login_tty',
'logout',
'logwtmp',
'lrand48',
'mallinfo',
'mallinfo2',
'mblen',
'mbrlen',
'mbrtowc',
'mbsnrtowcs',
'mbsrtowcs',
'mbtowc',
'mcheck',
'mcheck_check_all',
'mcheck_pedantic',
'mprobe',
'mrand48',
'mtrace',
'muntrace',
'nrand48',
'profil',
'ptsname',
'putchar_unlocked',
'putenv',
'pututline',
'putwchar_unlocked',
'pvalloc',
'qecvt',
'qfcvt',
'rcmd',
'rcmd_af',
're_comp',
're_exec',
'readdir',
'rexec',
'rexec_af',
'seed48',
'setenv',
'setfsent',
'setgrent',
'sethostent',
'sethostid',
'setkey',
'setlogmask',
'setnetent',
'setnetgrent',
'setprotoent',
'setpwent',
'setservent',
'setspent',
'setttyent',
'setusershell',
'setutent',
'sgetspent',
'siginterrupt',
'sleep',
'srand48',
'strerror',
'strsignal',
'strtok',
'tmpnam',
'ttyname',
'ttyslot',
'unsetenv',
'updwtmp',
'utmpname',
'valloc',
'wcrtomb',
'wcsnrtombs',
'wcsrtombs',
'wctomb',
'wordexp'
}
# From man 7 attributes
# the full token could be feature:function/condition - we just want function.
id_MTunsafe = [re.sub('^.*:', '', re.sub('/.*$', '', x))
for x in id_MTunsafe_full
]
def checkstatic(data):
def reportError(token, severity, msg, errid): # noqa: D103
cppcheckdata.reportError(token, severity, msg, 'threadsafety', errid)
def checkstatic(data): # noqa: D103
for var in data.variables:
if var.isStatic and var.isLocal:
type = None
vartype = None
if var.isClass:
type = 'object'
vartype = 'object'
else:
type = 'variable'
vartype = 'variable'
if var.isConst:
if data.standards.cpp == 'c++03':
reportError(var.typeStartToken, 'warning', 'Local constant static ' + type + ' \'' + var.nameToken.str + '\', dangerous if it is initialized in parallel threads', 'threadsafety-const')
reportError(
var.typeStartToken,
'warning',
'Local constant static '
+ vartype + "'" + var.nameToken.str
+ "', dangerous if it is initialized"
+ ' in parallel threads',
'threadsafety-const')
else:
reportError(var.typeStartToken, 'warning', 'Local static ' + type + ': ' + var.nameToken.str, 'threadsafety')
reportError(var.typeStartToken, 'warning',
'Local static ' + vartype + ': '
+ var.nameToken.str,
'threadsafety')
for arg in sys.argv[1:]:
if arg.startswith('-'):
continue
print('Checking %s...' % arg)
data = cppcheckdata.CppcheckData(arg)
def check_MTunsafe(data, srcfile, quiet=False):
"""
Look for functions marked MT-unsafe in their man pages.
The MT-unsafe functions are listed in id_MTunsafe (and id_MTunsafe_full).
That section of code can be regenerated by the external script MT-Unsafe.py
"""
# Assume that the code is Multi-thread safe until proven otherwise
MTsafe = True
# go through each configuration
for cfg in data.iterconfigurations():
print('Checking %s, config %s...' % (arg, cfg.name))
checkstatic(cfg)
if not quiet:
print('Checking %s, config %s...' % (srcfile, cfg.name))
# go through all tokens
for token in cfg.tokenlist:
if token.str in id_MTunsafe:
cppcheckdata.reportError(token, 'warning',
token.str + ' is MT-unsafe',
'threadsafety',
'unsafe-call')
MTsafe = False
return MTsafe
def get_args_parser(): # noqa: D103
parser = cppcheckdata.ArgumentParser()
return parser
if __name__ == '__main__':
parser = get_args_parser()
args = parser.parse_args()
exit_code = 0
quiet = not any((args.quiet, args.cli))
if not args.dumpfile:
if not args.quiet:
print('no input files.')
sys.exit(0)
for dumpfile in args.dumpfile:
# for arg in sys.argv[1:]:
# if arg.startswith('-'):
# continue
if not args.quiet:
print('Checking ' + dumpfile + '...')
# load XML from .dump file
data = cppcheckdata.CppcheckData(dumpfile)
# data = cppcheckdata.CppcheckData(args)
# Convert dump file path to source file in format generated by
# cppcheck. For example after the following call:
# cppcheck ./src/my-src.c --dump
# We got 'src/my-src.c' value for 'file' field in cppcheckdata.
srcfile = dumpfile.rstrip('.dump')
srcfile = os.path.expanduser(srcfile)
srcfile = os.path.normpath(srcfile)
check_MTunsafe(data, srcfile, quiet)
print('Checking %s...' % args)
for cfg in data.iterconfigurations():
print('Checking %s, config %s...' % (args, cfg.name))
checkstatic(cfg)
sys.exit(cppcheckdata.EXIT_CODE)

200
tools/MT-Unsafe.py Executable file
View File

@ -0,0 +1,200 @@
#!/usr/bin/python -u
"""
Generates a list of MT unsafe symbols for a cppcheck addon.
The cppcheck addon threadsafety.py uses a list
id_MTunsafe_full
of symbols - mostly functions - which are not multi-thread safe.
This script generates such a list by parsing (the troff source of)
a man page, or a directory tree of man pages,
looking for the attributes described in 'man 7 attributes'.
Typical example use:
MT-Unsafe.py /usr/share/man/man3
The output must then be merged into the threadsafety.py addon.
"""
import gzip
import os
import re
import sys
debug = 0
verbose = 0
unsafe_apis = set()
unsafe_types = set()
def dprint(level, fmt, varlist=()):
"""Print messages for someone debugging this script. This wraps print()."""
if debug < level:
return
if varlist:
print(fmt % varlist, file=sys.stderr)
else:
print(fmt, file=sys.stderr)
def vprint(level, fmt, varlist=()):
"""Print messages for someone running this script. This wraps print()."""
if verbose < level:
return
if varlist:
print(fmt % varlist, file=sys.stderr)
else:
print(fmt, file=sys.stderr)
def man_search(manpage):
"""Search one manpage for tokens in the attributes table."""
vprint(1, '-- %s --' % (manpage))
try:
if manpage.endswith('.gz'):
MANPAGE = gzip.open(manpage, 'r')
else:
MANPAGE = open(manpage, 'r')
except OSError as filename:
print('cannot open %s' % filename, file=sys.stderr)
return None, None
vprint(1, '%s opened' % (manpage))
TSmatch = None
for lineread in MANPAGE:
vprint(4, 'type %s', type(lineread))
lineread = str(lineread)
vprint(3, '--%s' % lineread)
# TSmatch = lineread.startswith('.TS')
TSmatch = re.search('\\.TS', lineread)
if TSmatch:
dprint(1, '%s:\treached .TS' % (manpage))
break
# dprint(2, '%s', lineread)
if not TSmatch:
dprint(1, '.TS not found in %s' % manpage)
return # None, None
vprint(1, 'Started reading the attribute table')
apis = set()
for lineread in MANPAGE:
lineread = str(lineread)
dprint(2, '%s' % (lineread))
if 'MT-Safe' in lineread:
vprint(1, 'clearing MT-Safe %s', lineread)
apis.clear()
res = re.search(r'\.BR\s+(\w+)\s', lineread)
# vprint(1, '%s for %s' % (res, lineread))
if res:
apis.add(res.group(1))
dprint(1, 'found api %s in %s' % (res.group(1), lineread))
next
if 'MT-Unsafe' in lineread:
resUnsafe = re.search("MT-Unsafe\\s+(.*)(\\n\'|$)", lineread)
if resUnsafe:
values = resUnsafe.group(1)
dprint(1, 'a %s' % values)
values = re.sub(r'\\n\'$', '', values)
#
values = values.split(' ')
dprint(1, 'values %s' % list(values))
for val in values:
unsafe_types.add(val)
# dprint(1, 'pushing ', list(apis), sep=',')
dprint(1, 'new apis %s' % list(apis))
for api in apis:
unsafe_apis.add(api)
next
# if lineread.startswith('.TE'):
if re.search('.TE', lineread):
dprint(1, '%s:\treached .TE' % (manpage))
break
dprint(1, 'Finished reading the attribute table')
MANPAGE.close()
return # list(unsafe_types), list(unsafe_apis)
def do_man_page(manpage):
"""Wrap man_search(), with logging."""
dprint(1, 'do_man_page(%s)' % (manpage))
man_search(manpage)
if unsafe_types:
dprint(1, '%d new types in %s' % (len(unsafe_types), manpage))
else:
dprint(1, 'No new types in %s' % (manpage))
if unsafe_apis:
dprint(1, '%d unsafe_apis in %s' % (len(unsafe_apis), manpage))
else:
dprint(1, 'No new apis in %s' % (manpage))
def do_man_dir(directory):
"""Recursively process a directory of man-pages."""
dprint(1, 'do_man_dir(%s)' % (directory))
if os.path.isfile(directory):
return do_man_page(directory)
for path, directories, files in os.walk(directory):
for file in files:
dprint(2, 'calling do_man_page(%s)' % (
os.path.join(path, file)))
do_man_page(os.path.join(path, file))
manpages = set()
for arg in sys.argv[1:]:
if arg.startswith('-'):
if re.match('^-+debug', arg):
debug = debug+1
dprint(1, 'debug %d' % debug)
next
else:
if os.access(arg, os.R_OK):
manpages.add(arg)
dprint(1, 'manpages+= %s' % (arg))
else:
dprint(0, 'skipping arg - not readable')
dprint(2, 'manpages: %s' % manpages)
for manpage in manpages:
do_man_dir(manpage)
dprint(1, '-----------------------------------------\n')
dprint(1, '%d unsafe_types' % len(unsafe_types))
dprint(1, '%d unsafe_apis' % len(unsafe_apis))
dprint(1, 'type: %s' % type(unsafe_apis))
print('{\n # Types marked MT-Unsafe')
# unsafe_types is not the whole of the list,
# so the last item *is* followed by a comma:
for u_type in sorted(unsafe_types):
print(" '%s'," % u_type)
print(' # APIs marked MT-Unsafe')
# unsafe_apis completes the list,
# so we ought to remove the last comma.
for u_api in sorted(unsafe_apis):
print(" '%s'," % u_api)
print('}\n')
# print(sorted(unsafe_apis), sep=',\n ', end='\n}\n')