From 07275288768243e9b426ffb2c029c87e8e7c3ca0 Mon Sep 17 00:00:00 2001
From: Andrew C Aitchison <45234463+andrew-aitchison@users.noreply.github.com>
Date: Thu, 8 Jun 2023 13:46:09 +0100
Subject: [PATCH] The threadsafety.py addon now flags MT-Unsafe symbols and
functions. (#5086)
---
.github/workflows/CI-unixish.yml | 4 +-
addons/test/threadsafety/MT-Unsafe.cpp | 67 +++
addons/test/threadsafety/local_static.cpp | 3 +-
.../test/threadsafety/local_static_const.cpp | 1 +
addons/threadsafety.py | 391 +++++++++++++++++-
tools/MT-Unsafe.py | 200 +++++++++
6 files changed, 643 insertions(+), 23 deletions(-)
create mode 100644 addons/test/threadsafety/MT-Unsafe.cpp
create mode 100755 tools/MT-Unsafe.py
diff --git a/.github/workflows/CI-unixish.yml b/.github/workflows/CI-unixish.yml
index da3e174e9..0c941e057 100644
--- a/.github/workflows/CI-unixish.yml
+++ b/.github/workflows/CI-unixish.yml
@@ -378,8 +378,8 @@ jobs:
# TODO: move to scriptcheck.yml so these are tested with all Python versions?
- name: Test addons
run: |
- ./cppcheck --addon=threadsafety addons/test/threadsafety
- ./cppcheck --addon=threadsafety --std=c++03 addons/test/threadsafety
+ ./cppcheck --error-exitcode=1 --inline-suppr --addon=threadsafety addons/test/threadsafety
+ ./cppcheck --error-exitcode=1 --inline-suppr --addon=threadsafety --std=c++03 addons/test/threadsafety
./cppcheck --addon=misra --enable=style --inline-suppr --enable=information --error-exitcode=1 addons/test/misra/misra-ctu-*-test.c
pushd addons/test
# We'll force C89 standard to enable an additional verification for
diff --git a/addons/test/threadsafety/MT-Unsafe.cpp b/addons/test/threadsafety/MT-Unsafe.cpp
new file mode 100644
index 000000000..5b23cefe8
--- /dev/null
+++ b/addons/test/threadsafety/MT-Unsafe.cpp
@@ -0,0 +1,67 @@
+/*
+ * Cppcheck - A tool for static C/C++ code analysis
+ * Copyright (C) 2023 Cppcheck team.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/*
+ * This does not match the standard cppchek test code,
+ * because I haven't figured that out yet.
+ * This code does compile and run, and does demonstrate
+ * the issues that the threadsafety.py addon is supposed to find.
+ * It does not use threads !
+ */
+
+#include
+#include
+#include
+
+void threadsafety_static()
+{
+ // cppcheck-suppress threadsafety-threadsafety
+ static unsigned int nCount = 0;
+
+ nCount++;
+ printf("%d\n", nCount);
+}
+
+void threadsafety_call()
+{
+ time_t now = time(nullptr);
+ // cppcheck-suppress threadsafety-unsafe-call
+ printf("%s\n", ctime(&now));
+}
+
+// cppcheck --addon=threadsafety
+// should *not* find any problems with this function.
+void threadsafety_safecall()
+{
+ char haystack[] = "alphabet";
+ char needle[] = "Alph";
+ char* found = strcasestr(haystack, needle);
+ printf("%s %sin %s\n", needle, found ? "" : "not ", haystack);
+}
+
+int main() {
+ threadsafety_static();
+
+ threadsafety_call();
+
+ threadsafety_safecall();
+
+ threadsafety_static();
+
+ return 0;
+}
diff --git a/addons/test/threadsafety/local_static.cpp b/addons/test/threadsafety/local_static.cpp
index 433cbaa77..b41a0571b 100644
--- a/addons/test/threadsafety/local_static.cpp
+++ b/addons/test/threadsafety/local_static.cpp
@@ -2,5 +2,6 @@ struct Dummy {
int x;
};
void func() {
- static Dummy dummy;
+ // cppcheck-suppress threadsafety-threadsafety
+ static Dummy dummy;
}
diff --git a/addons/test/threadsafety/local_static_const.cpp b/addons/test/threadsafety/local_static_const.cpp
index 12ab715af..eaed48be9 100644
--- a/addons/test/threadsafety/local_static_const.cpp
+++ b/addons/test/threadsafety/local_static_const.cpp
@@ -2,5 +2,6 @@ struct Dummy {
int x;
};
void func() {
+ // cppcheck-suppress threadsafety-threadsafety-const
static const Dummy dummy;
}
diff --git a/addons/threadsafety.py b/addons/threadsafety.py
index 81538a274..07f737e62 100755
--- a/addons/threadsafety.py
+++ b/addons/threadsafety.py
@@ -1,39 +1,390 @@
#!/usr/bin/env python3
-#
-# This script analyses Cppcheck dump files to locate threadsafety issues
-# - warn about static local objects
-#
+"""
+cppcheck addon for threadsafety detection.
-import cppcheckdata
+ This script analyses Cppcheck dump files to locate threadsafety issues.
+ It warns about
+ - static local objects
+ - MT-Unsafe symbols listed in the "Attributes" sections of man pages.
+
+"""
+
+from __future__ import print_function
+
+import os
+import re
import sys
-def reportError(token, severity, msg, id):
- cppcheckdata.reportError(token, severity, msg, 'threadsafety', id)
+import cppcheckdata
+
+# --------------------------------
+# List of MT-Unsafe identifiers
+# --------------------------------
+
+# This is Work In Progress.
+# Eventually it should contain all identifiers (types
+# and functions) which are MT-Unsafe.
+
+# The script tools/MT-Unsafe.py can help to re-generate this list.
+# It reads a man-page tree and report identifiers marked as "MT-Unsafe"
+# (see man 7 attributes for what this means), eg
+# MT-Unsafe.py /usr/share/man/man3
+
+id_MTunsafe_full = {
+ # MT-Unsafe types by definition
+ # 'pthread_t',
+ # Types marked MT-Unsafe
+ 'const:env',
+ 'const:hostid',
+ 'const:mallopt',
+ 'const:sigintr',
+ 'race:LogMask',
+ 'race:asctime',
+ 'race:crypt',
+ 'race:crypt_gensalt',
+ 'race:cuserid/!string',
+ 'race:dirstream',
+ 'race:drand48',
+ 'race:ecvt',
+ 'race:exit',
+ 'race:fcvt',
+ 'race:fgetgrent',
+ 'race:fgetpwent',
+ 'race:fgetspent',
+ 'race:fsent',
+ 'race:getdate',
+ 'race:getlogin',
+ 'race:getopt',
+ 'race:getspent',
+ 'race:getspnam',
+ 'race:grent',
+ 'race:grgid',
+ 'race:grnam',
+ 'race:hostbyaddr',
+ 'race:hostbyname',
+ 'race:hostbyname2',
+ 'race:hostent',
+ 'race:hsearch',
+ 'race:l64a',
+ 'race:localeconv',
+ 'race:mbrlen/!ps',
+ 'race:mbrtowc/!ps',
+ 'race:mbsnrtowcs/!ps',
+ 'race:mbsrtowcs/!ps',
+ 'race:mcheck',
+ 'race:mntentbuf',
+ 'race:netbyaddr',
+ 'race:netbyname',
+ 'race:netent',
+ 'race:netgrent',
+ 'race:protobyname',
+ 'race:protobynumber',
+ 'race:protoent',
+ 'race:ptsname',
+ 'race:pwent',
+ 'race:pwnam',
+ 'race:pwuid',
+ 'race:qecvt',
+ 'race:qfcvt',
+ 'race:servbyname',
+ 'race:servbyport',
+ 'race:servent',
+ 'race:sgetspent',
+ 'race:signgam',
+ 'race:stdin',
+ 'race:stdout',
+ 'race:streams',
+ 'race:strerror',
+ 'race:strsignal',
+ 'race:strtok',
+ 'race:tmbuf',
+ 'race:tmpnam/!s',
+ 'race:ttyent',
+ 'race:ttyname',
+ 'race:utent',
+ 'race:wcrtomb/!ps',
+ 'race:wcsnrtombs/!ps',
+ 'race:wcsrtombs/!ps',
+ 'sig:ALRM',
+ 'sig:SIGCHLD/linux',
+ # APIs marked MT-Unsafe
+ 'asctime',
+ 'clearenv',
+ 'ctime',
+ 'cuserid',
+ 'drand48',
+ 'ecvt',
+ 'encrypt',
+ 'endfsent',
+ 'endgrent',
+ 'endhostent',
+ 'endnetent',
+ 'endnetgrent',
+ 'endprotoent',
+ 'endpwent',
+ 'endservent',
+ 'endspent',
+ 'endttyent',
+ 'endusershell',
+ 'endutent',
+ 'erand48',
+ 'error_at_line',
+ 'ether_aton',
+ 'ether_ntoa',
+ 'exit',
+ 'fcloseall',
+ 'fcvt',
+ 'fgetgrent',
+ 'fgetpwent',
+ 'fgetspent',
+ 'fts_children',
+ 'fts_read',
+ 'gamma',
+ 'gammaf',
+ 'gammal',
+ 'getaliasbyname',
+ 'getaliasent',
+ 'getchar_unlocked',
+ 'getdate',
+ 'getfsent',
+ 'getfsfile',
+ 'getfsspec',
+ 'getgrent',
+ 'getgrent_r',
+ 'getgrgid',
+ 'getgrnam',
+ 'gethostbyaddr',
+ 'gethostbyname',
+ 'gethostbyname2',
+ 'gethostent',
+ 'gethostent_r',
+ 'getlogin',
+ 'getlogin_r',
+ 'getmntent',
+ 'getnetbyaddr',
+ 'getnetbyname',
+ 'getnetent',
+ 'getnetgrent',
+ 'getnetgrent_r',
+ 'getopt',
+ 'getopt_long',
+ 'getopt_long_only',
+ 'getpass',
+ 'getprotobyname',
+ 'getprotobynumber',
+ 'getprotoent',
+ 'getpwent',
+ 'getpwent_r',
+ 'getpwnam',
+ 'getpwuid',
+ 'getrpcbyname',
+ 'getrpcbynumber',
+ 'getrpcent',
+ 'getservbyname',
+ 'getservbyport',
+ 'getservent',
+ 'getspent',
+ 'getspent_r',
+ 'getspnam',
+ 'getttyent',
+ 'getttynam',
+ 'getusershell',
+ 'getutent',
+ 'getutid',
+ 'getutline',
+ 'getwchar_unlocked',
+ 'glob',
+ 'gmtime',
+ 'hcreate',
+ 'hdestroy',
+ 'hsearch',
+ 'innetgr',
+ 'jrand48',
+ 'l64a',
+ 'lcong48',
+ 'localeconv',
+ 'localtime',
+ 'login',
+ 'login_tty',
+ 'logout',
+ 'logwtmp',
+ 'lrand48',
+ 'mallinfo',
+ 'mallinfo2',
+ 'mblen',
+ 'mbrlen',
+ 'mbrtowc',
+ 'mbsnrtowcs',
+ 'mbsrtowcs',
+ 'mbtowc',
+ 'mcheck',
+ 'mcheck_check_all',
+ 'mcheck_pedantic',
+ 'mprobe',
+ 'mrand48',
+ 'mtrace',
+ 'muntrace',
+ 'nrand48',
+ 'profil',
+ 'ptsname',
+ 'putchar_unlocked',
+ 'putenv',
+ 'pututline',
+ 'putwchar_unlocked',
+ 'pvalloc',
+ 'qecvt',
+ 'qfcvt',
+ 'rcmd',
+ 'rcmd_af',
+ 're_comp',
+ 're_exec',
+ 'readdir',
+ 'rexec',
+ 'rexec_af',
+ 'seed48',
+ 'setenv',
+ 'setfsent',
+ 'setgrent',
+ 'sethostent',
+ 'sethostid',
+ 'setkey',
+ 'setlogmask',
+ 'setnetent',
+ 'setnetgrent',
+ 'setprotoent',
+ 'setpwent',
+ 'setservent',
+ 'setspent',
+ 'setttyent',
+ 'setusershell',
+ 'setutent',
+ 'sgetspent',
+ 'siginterrupt',
+ 'sleep',
+ 'srand48',
+ 'strerror',
+ 'strsignal',
+ 'strtok',
+ 'tmpnam',
+ 'ttyname',
+ 'ttyslot',
+ 'unsetenv',
+ 'updwtmp',
+ 'utmpname',
+ 'valloc',
+ 'wcrtomb',
+ 'wcsnrtombs',
+ 'wcsrtombs',
+ 'wctomb',
+ 'wordexp'
+}
+
+# From man 7 attributes
+# the full token could be feature:function/condition - we just want function.
+id_MTunsafe = [re.sub('^.*:', '', re.sub('/.*$', '', x))
+ for x in id_MTunsafe_full
+ ]
-def checkstatic(data):
+def reportError(token, severity, msg, errid): # noqa: D103
+ cppcheckdata.reportError(token, severity, msg, 'threadsafety', errid)
+
+
+def checkstatic(data): # noqa: D103
for var in data.variables:
if var.isStatic and var.isLocal:
- type = None
+ vartype = None
if var.isClass:
- type = 'object'
+ vartype = 'object'
else:
- type = 'variable'
+ vartype = 'variable'
if var.isConst:
if data.standards.cpp == 'c++03':
- reportError(var.typeStartToken, 'warning', 'Local constant static ' + type + ' \'' + var.nameToken.str + '\', dangerous if it is initialized in parallel threads', 'threadsafety-const')
+ reportError(
+ var.typeStartToken,
+ 'warning',
+ 'Local constant static '
+ + vartype + "'" + var.nameToken.str
+ + "', dangerous if it is initialized"
+ + ' in parallel threads',
+ 'threadsafety-const')
else:
- reportError(var.typeStartToken, 'warning', 'Local static ' + type + ': ' + var.nameToken.str, 'threadsafety')
+ reportError(var.typeStartToken, 'warning',
+ 'Local static ' + vartype + ': '
+ + var.nameToken.str,
+ 'threadsafety')
-for arg in sys.argv[1:]:
- if arg.startswith('-'):
- continue
- print('Checking %s...' % arg)
- data = cppcheckdata.CppcheckData(arg)
+def check_MTunsafe(data, srcfile, quiet=False):
+ """
+ Look for functions marked MT-unsafe in their man pages.
+ The MT-unsafe functions are listed in id_MTunsafe (and id_MTunsafe_full).
+ That section of code can be regenerated by the external script MT-Unsafe.py
+ """
+ # Assume that the code is Multi-thread safe until proven otherwise
+ MTsafe = True
+
+ # go through each configuration
for cfg in data.iterconfigurations():
- print('Checking %s, config %s...' % (arg, cfg.name))
- checkstatic(cfg)
+ if not quiet:
+ print('Checking %s, config %s...' % (srcfile, cfg.name))
+
+ # go through all tokens
+ for token in cfg.tokenlist:
+ if token.str in id_MTunsafe:
+ cppcheckdata.reportError(token, 'warning',
+ token.str + ' is MT-unsafe',
+ 'threadsafety',
+ 'unsafe-call')
+ MTsafe = False
+
+ return MTsafe
+
+
+def get_args_parser(): # noqa: D103
+ parser = cppcheckdata.ArgumentParser()
+ return parser
+
+
+if __name__ == '__main__':
+ parser = get_args_parser()
+ args = parser.parse_args()
+
+ exit_code = 0
+ quiet = not any((args.quiet, args.cli))
+
+ if not args.dumpfile:
+ if not args.quiet:
+ print('no input files.')
+ sys.exit(0)
+
+ for dumpfile in args.dumpfile:
+ # for arg in sys.argv[1:]:
+ # if arg.startswith('-'):
+ # continue
+
+ if not args.quiet:
+ print('Checking ' + dumpfile + '...')
+
+ # load XML from .dump file
+ data = cppcheckdata.CppcheckData(dumpfile)
+ # data = cppcheckdata.CppcheckData(args)
+
+ # Convert dump file path to source file in format generated by
+ # cppcheck. For example after the following call:
+ # cppcheck ./src/my-src.c --dump
+ # We got 'src/my-src.c' value for 'file' field in cppcheckdata.
+ srcfile = dumpfile.rstrip('.dump')
+ srcfile = os.path.expanduser(srcfile)
+ srcfile = os.path.normpath(srcfile)
+
+ check_MTunsafe(data, srcfile, quiet)
+
+ print('Checking %s...' % args)
+
+ for cfg in data.iterconfigurations():
+ print('Checking %s, config %s...' % (args, cfg.name))
+ checkstatic(cfg)
sys.exit(cppcheckdata.EXIT_CODE)
diff --git a/tools/MT-Unsafe.py b/tools/MT-Unsafe.py
new file mode 100755
index 000000000..a30395ff0
--- /dev/null
+++ b/tools/MT-Unsafe.py
@@ -0,0 +1,200 @@
+#!/usr/bin/python -u
+
+"""
+Generates a list of MT unsafe symbols for a cppcheck addon.
+
+The cppcheck addon threadsafety.py uses a list
+ id_MTunsafe_full
+of symbols - mostly functions - which are not multi-thread safe.
+
+This script generates such a list by parsing (the troff source of)
+a man page, or a directory tree of man pages,
+looking for the attributes described in 'man 7 attributes'.
+
+Typical example use:
+ MT-Unsafe.py /usr/share/man/man3
+The output must then be merged into the threadsafety.py addon.
+"""
+
+import gzip
+import os
+import re
+import sys
+
+debug = 0
+verbose = 0
+
+unsafe_apis = set()
+unsafe_types = set()
+
+
+def dprint(level, fmt, varlist=()):
+ """Print messages for someone debugging this script. This wraps print()."""
+ if debug < level:
+ return
+ if varlist:
+ print(fmt % varlist, file=sys.stderr)
+ else:
+ print(fmt, file=sys.stderr)
+
+
+def vprint(level, fmt, varlist=()):
+ """Print messages for someone running this script. This wraps print()."""
+ if verbose < level:
+ return
+ if varlist:
+ print(fmt % varlist, file=sys.stderr)
+ else:
+ print(fmt, file=sys.stderr)
+
+
+def man_search(manpage):
+ """Search one manpage for tokens in the attributes table."""
+ vprint(1, '-- %s --' % (manpage))
+
+ try:
+ if manpage.endswith('.gz'):
+ MANPAGE = gzip.open(manpage, 'r')
+ else:
+ MANPAGE = open(manpage, 'r')
+ except OSError as filename:
+ print('cannot open %s' % filename, file=sys.stderr)
+ return None, None
+
+ vprint(1, '%s opened' % (manpage))
+
+ TSmatch = None
+ for lineread in MANPAGE:
+ vprint(4, 'type %s', type(lineread))
+ lineread = str(lineread)
+ vprint(3, '--%s' % lineread)
+ # TSmatch = lineread.startswith('.TS')
+ TSmatch = re.search('\\.TS', lineread)
+ if TSmatch:
+ dprint(1, '%s:\treached .TS' % (manpage))
+ break
+
+ # dprint(2, '%s', lineread)
+
+ if not TSmatch:
+ dprint(1, '.TS not found in %s' % manpage)
+ return # None, None
+
+ vprint(1, 'Started reading the attribute table')
+
+ apis = set()
+ for lineread in MANPAGE:
+ lineread = str(lineread)
+ dprint(2, '%s' % (lineread))
+ if 'MT-Safe' in lineread:
+ vprint(1, 'clearing MT-Safe %s', lineread)
+ apis.clear()
+
+ res = re.search(r'\.BR\s+(\w+)\s', lineread)
+ # vprint(1, '%s for %s' % (res, lineread))
+ if res:
+ apis.add(res.group(1))
+ dprint(1, 'found api %s in %s' % (res.group(1), lineread))
+ next
+
+ if 'MT-Unsafe' in lineread:
+ resUnsafe = re.search("MT-Unsafe\\s+(.*)(\\n\'|$)", lineread)
+
+ if resUnsafe:
+ values = resUnsafe.group(1)
+ dprint(1, 'a %s' % values)
+ values = re.sub(r'\\n\'$', '', values)
+ #
+ values = values.split(' ')
+ dprint(1, 'values %s' % list(values))
+ for val in values:
+ unsafe_types.add(val)
+
+ # dprint(1, 'pushing ', list(apis), sep=',')
+ dprint(1, 'new apis %s' % list(apis))
+ for api in apis:
+ unsafe_apis.add(api)
+ next
+
+ # if lineread.startswith('.TE'):
+ if re.search('.TE', lineread):
+ dprint(1, '%s:\treached .TE' % (manpage))
+ break
+
+ dprint(1, 'Finished reading the attribute table')
+
+ MANPAGE.close()
+
+ return # list(unsafe_types), list(unsafe_apis)
+
+
+def do_man_page(manpage):
+ """Wrap man_search(), with logging."""
+ dprint(1, 'do_man_page(%s)' % (manpage))
+ man_search(manpage)
+ if unsafe_types:
+ dprint(1, '%d new types in %s' % (len(unsafe_types), manpage))
+ else:
+ dprint(1, 'No new types in %s' % (manpage))
+
+ if unsafe_apis:
+ dprint(1, '%d unsafe_apis in %s' % (len(unsafe_apis), manpage))
+ else:
+ dprint(1, 'No new apis in %s' % (manpage))
+
+
+def do_man_dir(directory):
+ """Recursively process a directory of man-pages."""
+ dprint(1, 'do_man_dir(%s)' % (directory))
+ if os.path.isfile(directory):
+ return do_man_page(directory)
+
+ for path, directories, files in os.walk(directory):
+ for file in files:
+ dprint(2, 'calling do_man_page(%s)' % (
+ os.path.join(path, file)))
+ do_man_page(os.path.join(path, file))
+
+
+manpages = set()
+for arg in sys.argv[1:]:
+ if arg.startswith('-'):
+ if re.match('^-+debug', arg):
+ debug = debug+1
+ dprint(1, 'debug %d' % debug)
+ next
+ else:
+ if os.access(arg, os.R_OK):
+ manpages.add(arg)
+ dprint(1, 'manpages+= %s' % (arg))
+ else:
+ dprint(0, 'skipping arg - not readable')
+
+dprint(2, 'manpages: %s' % manpages)
+
+
+for manpage in manpages:
+ do_man_dir(manpage)
+
+
+dprint(1, '-----------------------------------------\n')
+dprint(1, '%d unsafe_types' % len(unsafe_types))
+dprint(1, '%d unsafe_apis' % len(unsafe_apis))
+dprint(1, 'type: %s' % type(unsafe_apis))
+
+print('{\n # Types marked MT-Unsafe')
+# unsafe_types is not the whole of the list,
+# so the last item *is* followed by a comma:
+for u_type in sorted(unsafe_types):
+ print(" '%s'," % u_type)
+
+
+print(' # APIs marked MT-Unsafe')
+# unsafe_apis completes the list,
+# so we ought to remove the last comma.
+for u_api in sorted(unsafe_apis):
+ print(" '%s'," % u_api)
+
+print('}\n')
+
+# print(sorted(unsafe_apis), sep=',\n ', end='\n}\n')