#!/usr/bin/python -u """ Generates a list of MT unsafe symbols for a cppcheck addon. The cppcheck addon threadsafety.py uses a list id_MTunsafe_full of symbols - mostly functions - which are not multi-thread safe. This script generates such a list by parsing (the troff source of) a man page, or a directory tree of man pages, looking for the attributes described in 'man 7 attributes'. Typical example use: MT-Unsafe.py /usr/share/man/man3 The output must then be merged into the threadsafety.py addon. """ import gzip import os import re import sys debug = 0 verbose = 0 unsafe_apis = set() unsafe_types = set() def dprint(level, fmt, varlist=()): """Print messages for someone debugging this script. This wraps print().""" if debug < level: return if varlist: print(fmt % varlist, file=sys.stderr) else: print(fmt, file=sys.stderr) def vprint(level, fmt, varlist=()): """Print messages for someone running this script. This wraps print().""" if verbose < level: return if varlist: print(fmt % varlist, file=sys.stderr) else: print(fmt, file=sys.stderr) def man_search(manpage): """Search one manpage for tokens in the attributes table.""" vprint(1, '-- %s --' % (manpage)) try: if manpage.endswith('.gz'): MANPAGE = gzip.open(manpage, 'r') else: MANPAGE = open(manpage, 'r') except OSError as filename: print('cannot open %s' % filename, file=sys.stderr) return None, None vprint(1, '%s opened' % (manpage)) TSmatch = None for lineread in MANPAGE: vprint(4, 'type %s', type(lineread)) lineread = str(lineread) vprint(3, '--%s' % lineread) # TSmatch = lineread.startswith('.TS') TSmatch = re.search('\\.TS', lineread) if TSmatch: dprint(1, '%s:\treached .TS' % (manpage)) break # dprint(2, '%s', lineread) if not TSmatch: dprint(1, '.TS not found in %s' % manpage) return # None, None vprint(1, 'Started reading the attribute table') apis = set() for lineread in MANPAGE: lineread = str(lineread) dprint(2, '%s' % (lineread)) if 'MT-Safe' in lineread: vprint(1, 'clearing MT-Safe %s', lineread) apis.clear() res = re.search(r'\.BR\s+(\w+)\s', lineread) # vprint(1, '%s for %s' % (res, lineread)) if res: apis.add(res.group(1)) dprint(1, 'found api %s in %s' % (res.group(1), lineread)) next if 'MT-Unsafe' in lineread: resUnsafe = re.search("MT-Unsafe\\s+(.*)(\\n\'|$)", lineread) if resUnsafe: values = resUnsafe.group(1) dprint(1, 'a %s' % values) values = re.sub(r'\\n\'$', '', values) # values = values.split(' ') dprint(1, 'values %s' % list(values)) for val in values: unsafe_types.add(val) # dprint(1, 'pushing ', list(apis), sep=',') dprint(1, 'new apis %s' % list(apis)) for api in apis: unsafe_apis.add(api) next # if lineread.startswith('.TE'): if re.search('.TE', lineread): dprint(1, '%s:\treached .TE' % (manpage)) break dprint(1, 'Finished reading the attribute table') MANPAGE.close() return # list(unsafe_types), list(unsafe_apis) def do_man_page(manpage): """Wrap man_search(), with logging.""" dprint(1, 'do_man_page(%s)' % (manpage)) man_search(manpage) if unsafe_types: dprint(1, '%d new types in %s' % (len(unsafe_types), manpage)) else: dprint(1, 'No new types in %s' % (manpage)) if unsafe_apis: dprint(1, '%d unsafe_apis in %s' % (len(unsafe_apis), manpage)) else: dprint(1, 'No new apis in %s' % (manpage)) def do_man_dir(directory): """Recursively process a directory of man-pages.""" dprint(1, 'do_man_dir(%s)' % (directory)) if os.path.isfile(directory): return do_man_page(directory) for path, directories, files in os.walk(directory): for file in files: dprint(2, 'calling do_man_page(%s)' % ( os.path.join(path, file))) do_man_page(os.path.join(path, file)) manpages = set() for arg in sys.argv[1:]: if arg.startswith('-'): if re.match('^-+debug', arg): debug = debug+1 dprint(1, 'debug %d' % debug) next else: if os.access(arg, os.R_OK): manpages.add(arg) dprint(1, 'manpages+= %s' % (arg)) else: dprint(0, 'skipping arg - not readable') dprint(2, 'manpages: %s' % manpages) for manpage in manpages: do_man_dir(manpage) dprint(1, '-----------------------------------------\n') dprint(1, '%d unsafe_types' % len(unsafe_types)) dprint(1, '%d unsafe_apis' % len(unsafe_apis)) dprint(1, 'type: %s' % type(unsafe_apis)) print('{\n # Types marked MT-Unsafe') # unsafe_types is not the whole of the list, # so the last item *is* followed by a comma: for u_type in sorted(unsafe_types): print(" '%s'," % u_type) print(' # APIs marked MT-Unsafe') # unsafe_apis completes the list, # so we ought to remove the last comma. for u_api in sorted(unsafe_apis): print(" '%s'," % u_api) print('}\n') # print(sorted(unsafe_apis), sep=',\n ', end='\n}\n')