donate-cpu: greatly improved `LibraryIncludes.get_libraries()` performance / some optimizations and cleanups (#4273)

* donate_cpu_lib.py: use `os.path.join()`

* donate-cpu: removed remaining usage of `os.chdir()`

* donate_cpu_lib.py: moved library includes code into class

* donate_cpu_lib.py: pre-compile library include regular expressions

* donate_cpu_lib.py: pre-compile some more regular expressions

* donate_cpu_lib.py: small unpack_package() cleanup and optimization

* donate_cpu_lib.py: added some information about the extracted files to unpack_package()

* donate_cpu_lib.py: bumped version

* added test_donate_cpu_lib.py

* donate_cpu_lib.py: greatly improved `LibraryIncludes.get_libraries()` performance

only scan each file once for previously undetected libraries only

* test_donate_cpu_lib.py: fix for Python 3.5

* scriptcheck.yml: added `-v` to pytest calls so we get the complete diff on assertions

* fixed `test_arguments_regression()` Python tests with additional pytest arguments

* donate_cpu_lib.py: use `subprocess.check_call()`

* test_donate_cpu_lib.py: sort results to address differences in order with Python 3.5
This commit is contained in:
Oliver Stöneberg 2022-07-13 21:09:29 +02:00 committed by GitHub
parent 05a1e92e35
commit bc58f55c6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 211 additions and 119 deletions

View File

@ -116,7 +116,7 @@ jobs:
- name: test addons - name: test addons
run: | run: |
python -m pytest addons/test/test-*.py python -m pytest -v addons/test/test-*.py
env: env:
PYTHONPATH: ./addons PYTHONPATH: ./addons
@ -128,7 +128,14 @@ jobs:
- name: test reduce - name: test reduce
run: | run: |
python -m pytest tools/test_reduce.py python -m pytest -v tools/test_reduce.py
env:
PYTHONPATH: ./tools
- name: test donate_cpu_lib
if: matrix.python-version != '2.7'
run: |
python -m pytest -v tools/test_donate_cpu_lib.py
env: env:
PYTHONPATH: ./tools PYTHONPATH: ./tools

View File

@ -138,6 +138,11 @@ def test_arguments_regression():
from addons.misra import get_args_parser from addons.misra import get_args_parser
# sys.argv contains all pytest arguments - so clear all existing arguments first and restore afterwards
sys_argv_old = sys.argv
sys.argv = [sys.argv[0]]
try:
for arg in args_exit: for arg in args_exit:
sys.argv.append(arg) sys.argv.append(arg)
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
@ -153,3 +158,5 @@ def test_arguments_regression():
except SystemExit: except SystemExit:
pytest.fail("Unexpected SystemExit with '%s'" % arg) pytest.fail("Unexpected SystemExit with '%s'" % arg)
sys.argv.remove(arg) sys.argv.remove(arg)
finally:
sys.argv = sys_argv_old

View File

@ -115,6 +115,11 @@ def test_arguments_regression():
from addons.y2038 import get_args_parser from addons.y2038 import get_args_parser
# sys.argv contains all pytest arguments - so clear all existing arguments first and restore afterwards
sys_argv_old = sys.argv
sys.argv = [sys.argv[0]]
try:
for arg in args_exit: for arg in args_exit:
sys.argv.append(arg) sys.argv.append(arg)
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
@ -130,3 +135,5 @@ def test_arguments_regression():
except SystemExit: except SystemExit:
pytest.fail("Unexpected SystemExit with '%s'" % arg) pytest.fail("Unexpected SystemExit with '%s'" % arg)
sys.argv.remove(arg) sys.argv.remove(arg)
finally:
sys.argv = sys_argv_old

View File

@ -209,7 +209,7 @@ while True:
head_timing_info = '' head_timing_info = ''
old_timing_info = '' old_timing_info = ''
cppcheck_head_info = '' cppcheck_head_info = ''
libraries = get_libraries(source_path) libraries = library_includes.get_libraries(source_path)
for ver in cppcheck_versions: for ver in cppcheck_versions:
tree_path = os.path.join(work_path, 'tree-'+ver) tree_path = os.path.join(work_path, 'tree-'+ver)
@ -218,7 +218,7 @@ while True:
tree_path = os.path.join(work_path, 'tree-main') tree_path = os.path.join(work_path, 'tree-main')
cppcheck_head_info = get_cppcheck_info(tree_path) cppcheck_head_info = get_cppcheck_info(tree_path)
capture_callstack = True capture_callstack = True
c, errout, info, t, cppcheck_options, timing_info = scan_package(work_path, tree_path, source_path, jobs, libraries, capture_callstack) c, errout, info, t, cppcheck_options, timing_info = scan_package(tree_path, source_path, jobs, libraries, capture_callstack)
if c < 0: if c < 0:
if c == -101 and 'error: could not find or open any of the paths given.' in errout: if c == -101 and 'error: could not find or open any of the paths given.' in errout:
# No sourcefile found (for example only headers present) # No sourcefile found (for example only headers present)

View File

@ -15,7 +15,7 @@ import shlex
# Version scheme (MAJOR.MINOR.PATCH) should orientate on "Semantic Versioning" https://semver.org/ # Version scheme (MAJOR.MINOR.PATCH) should orientate on "Semantic Versioning" https://semver.org/
# Every change in this script should result in increasing the version number accordingly (exceptions may be cosmetic # Every change in this script should result in increasing the version number accordingly (exceptions may be cosmetic
# changes) # changes)
CLIENT_VERSION = "1.3.26" CLIENT_VERSION = "1.3.27"
# Timeout for analysis with Cppcheck in seconds # Timeout for analysis with Cppcheck in seconds
CPPCHECK_TIMEOUT = 30 * 60 CPPCHECK_TIMEOUT = 30 * 60
@ -106,8 +106,7 @@ def checkout_cppcheck_version(repo_path, version, cppcheck_path):
def get_cppcheck_info(cppcheck_path): def get_cppcheck_info(cppcheck_path):
try: try:
os.chdir(cppcheck_path) return subprocess.check_output(['git', 'show', "--pretty=%h (%ci)", 'HEAD', '--no-patch', '--no-notes'], cwd=cppcheck_path).decode('utf-8').strip()
return subprocess.check_output(['git', 'show', "--pretty=%h (%ci)", 'HEAD', '--no-patch', '--no-notes']).decode('utf-8').strip()
except: except:
return '' return ''
@ -125,10 +124,9 @@ def compile_version(cppcheck_path, jobs):
def compile_cppcheck(cppcheck_path, jobs): def compile_cppcheck(cppcheck_path, jobs):
print('Compiling {}'.format(os.path.basename(cppcheck_path))) print('Compiling {}'.format(os.path.basename(cppcheck_path)))
try: try:
os.chdir(cppcheck_path)
if sys.platform == 'win32': if sys.platform == 'win32':
subprocess.call(['MSBuild.exe', cppcheck_path + '/cppcheck.sln', '/property:Configuration=Release', '/property:Platform=x64']) subprocess.check_call(['MSBuild.exe', os.path.join(cppcheck_path, 'cppcheck.sln'), '/property:Configuration=Release', '/property:Platform=x64'], cwd=cppcheck_path)
subprocess.call([cppcheck_path + '/bin/cppcheck.exe', '--version']) subprocess.check_call([os.path.join(cppcheck_path, 'bin', 'cppcheck.exe'), '--version'], cwd=cppcheck_path)
else: else:
subprocess.check_call(['make', jobs, 'MATCHCOMPILER=yes', 'CXXFLAGS=-O2 -g -w'], cwd=cppcheck_path) subprocess.check_call(['make', jobs, 'MATCHCOMPILER=yes', 'CXXFLAGS=-O2 -g -w'], cwd=cppcheck_path)
subprocess.check_call([os.path.join(cppcheck_path, 'cppcheck'), '--version'], cwd=cppcheck_path) subprocess.check_call([os.path.join(cppcheck_path, 'cppcheck'), '--version'], cwd=cppcheck_path)
@ -239,52 +237,51 @@ def unpack_package(work_path, tgz, cpp_only=False, skip_files=None):
temp_path = os.path.join(work_path, 'temp') temp_path = os.path.join(work_path, 'temp')
__remove_tree(temp_path) __remove_tree(temp_path)
os.mkdir(temp_path) os.mkdir(temp_path)
found = False
if tarfile.is_tarfile(tgz):
with tarfile.open(tgz) as tf:
for member in tf:
header_endings = ('.hpp', '.h++', '.hxx', '.hh', '.h') header_endings = ('.hpp', '.h++', '.hxx', '.hh', '.h')
source_endings = ('.cpp', '.c++', '.cxx', '.cc', '.tpp', '.txx', '.ipp', '.ixx', '.qml') source_endings = ('.cpp', '.c++', '.cxx', '.cc', '.tpp', '.txx', '.ipp', '.ixx', '.qml')
c_source_endings = ('.c',) c_source_endings = ('.c',)
if not cpp_only: if not cpp_only:
source_endings = source_endings + c_source_endings source_endings = source_endings + c_source_endings
source_found = False
if tarfile.is_tarfile(tgz):
with tarfile.open(tgz) as tf:
total = 0
extracted = 0
skipped = 0
for member in tf:
total += 1
if member.name.startswith(('/', '..')): if member.name.startswith(('/', '..')):
# Skip dangerous file names # Skip dangerous file names
# print('skipping dangerous file: ' + member.name)
skipped += 1
continue continue
elif member.name.lower().endswith(header_endings + source_endings):
is_source = member.name.lower().endswith(source_endings)
if is_source or member.name.lower().endswith(header_endings):
if skip_files is not None: if skip_files is not None:
skip = False skip = False
for skip_file in skip_files: for skip_file in skip_files:
if member.name.endswith('/' + skip_file): if member.name.endswith('/' + skip_file):
# print('found file to skip: ' + member.name)
skip = True skip = True
break break
if skip: if skip:
skipped += 1
continue continue
try: try:
tf.extract(member.name, temp_path) tf.extract(member.name, temp_path)
if member.name.lower().endswith(source_endings): if is_source:
found = True source_found = True
extracted += 1
except OSError: except OSError:
pass pass
except AttributeError: except AttributeError:
pass pass
return temp_path, found print('extracted {} of {} files (skipped {}{})'.format(extracted, total, skipped, (' / only headers' if (extracted and not source_found) else '')))
return temp_path, source_found
def __has_include(path, includes):
re_includes = [re.escape(inc) for inc in includes]
re_expr = '^[ \t]*#[ \t]*include[ \t]*(' + '|'.join(re_includes) + ')'
for root, _, files in os.walk(path):
for name in files:
filename = os.path.join(root, name)
try:
with open(filename, 'rt', errors='ignore') as f:
filedata = f.read()
if re.search(re_expr, filedata, re.MULTILINE):
return True
except IOError:
pass
return False
def __run_command(cmd, print_cmd=True): def __run_command(cmd, print_cmd=True):
@ -325,22 +322,21 @@ def __run_command(cmd, print_cmd=True):
return return_code, stdout, stderr, elapsed_time return return_code, stdout, stderr, elapsed_time
def scan_package(work_path, cppcheck_path, source_path, jobs, libraries, capture_callstack=True): def scan_package(cppcheck_path, source_path, jobs, libraries, capture_callstack=True):
print('Analyze..') print('Analyze..')
os.chdir(work_path)
libs = '' libs = ''
for library in libraries: for library in libraries:
if os.path.exists(os.path.join(cppcheck_path, 'cfg', library + '.cfg')): if os.path.exists(os.path.join(cppcheck_path, 'cfg', library + '.cfg')):
libs += '--library=' + library + ' ' libs += '--library=' + library + ' '
dir_to_scan = os.path.basename(source_path) dir_to_scan = source_path
# Reference for GNU C: https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html # Reference for GNU C: https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
options = libs + ' --showtime=top5 --check-library --inconclusive --enable=style,information --inline-suppr --template=daca2' options = libs + ' --showtime=top5 --check-library --inconclusive --enable=style,information --inline-suppr --template=daca2'
options += ' -D__GNUC__ --platform=unix64' options += ' -D__GNUC__ --platform=unix64'
options += ' -rp={}'.format(dir_to_scan) options += ' -rp={}'.format(dir_to_scan)
if sys.platform == 'win32': if sys.platform == 'win32':
cppcheck_cmd = cppcheck_path + '/bin/cppcheck.exe ' + options cppcheck_cmd = os.path.join(cppcheck_path, 'bin', 'cppcheck.exe') + ' ' + options
cmd = cppcheck_cmd + ' ' + jobs + ' ' + dir_to_scan cmd = cppcheck_cmd + ' ' + jobs + ' ' + dir_to_scan
else: else:
cppcheck_cmd = os.path.join(cppcheck_path, 'cppcheck') + ' ' + options cppcheck_cmd = os.path.join(cppcheck_path, 'cppcheck') + ' ' + options
@ -352,12 +348,15 @@ def scan_package(work_path, cppcheck_path, source_path, jobs, libraries, capture
issue_messages_list = [] issue_messages_list = []
internal_error_messages_list = [] internal_error_messages_list = []
count = 0 count = 0
re_obj = None
for line in stderr.split('\n'): for line in stderr.split('\n'):
if ': information: ' in line: if ': information: ' in line:
information_messages_list.append(line + '\n') information_messages_list.append(line + '\n')
elif line: elif line:
issue_messages_list.append(line + '\n') issue_messages_list.append(line + '\n')
if re.match(r'.*:[0-9]+:.*\]$', line): if re_obj is None:
re_obj = re.compile(r'.*:[0-9]+:.*\]$')
if re_obj.match(line):
count += 1 count += 1
if ': error: Internal error: ' in line: if ': error: Internal error: ' in line:
internal_error_messages_list.append(line + '\n') internal_error_messages_list.append(line + '\n')
@ -454,8 +453,12 @@ def scan_package(work_path, cppcheck_path, source_path, jobs, libraries, capture
def __split_results(results): def __split_results(results):
ret = [] ret = []
w = None w = None
re_obj = None
for line in results.split('\n'): for line in results.split('\n'):
if line.endswith(']') and re.search(r': (error|warning|style|performance|portability|information|debug):', line): if line.endswith(']'):
if re_obj is None:
re_obj = re.compile(r': (error|warning|style|performance|portability|information|debug):')
if re_obj.search(line):
if w is not None: if w is not None:
ret.append(w.strip()) ret.append(w.strip())
w = '' w = ''
@ -542,10 +545,10 @@ def upload_info(package, info_output, server_address):
return False return False
def get_libraries(folder): class LibraryIncludes:
libraries = ['posix', 'gnu'] def __init__(self):
library_includes = {'boost': ['<boost/'], include_mappings = {'boost': ['<boost/'],
'bsd': ['<sys/queue.h>', '<sys/tree.h>', '<sys/uio.h>', '<bsd/', '<fts.h>', '<db.h>', '<err.h>', '<vis.h>'], 'bsd': ['<sys/queue.h>', '<sys/tree.h>', '<sys/uio.h>','<bsd/', '<fts.h>', '<db.h>', '<err.h>', '<vis.h>'],
'cairo': ['<cairo.h>'], 'cairo': ['<cairo.h>'],
'cppunit': ['<cppunit/'], 'cppunit': ['<cppunit/'],
'icu': ['<unicode/', '"unicode/'], 'icu': ['<unicode/', '"unicode/'],
@ -578,10 +581,43 @@ def get_libraries(folder):
'wxwidgets': ['<wx/', '"wx/'], 'wxwidgets': ['<wx/', '"wx/'],
'zlib': ['<zlib.h>'], 'zlib': ['<zlib.h>'],
} }
self.__library_includes_re = {}
for library, includes in include_mappings.items():
re_includes = [re.escape(inc) for inc in includes]
re_expr = '^[ \t]*#[ \t]*include[ \t]*(' + '|'.join(re_includes) + ')'
re_obj = re.compile(re_expr, re.MULTILINE)
self.__library_includes_re[library] = re_obj
def __iterate_files(self, path, has_include_cb):
for root, _, files in os.walk(path):
for name in files:
filename = os.path.join(root, name)
try:
with open(filename, 'rt', errors='ignore') as f:
filedata = f.read()
has_include_cb(filedata)
except IOError:
pass
def get_libraries(self, folder):
print('Detecting library usage...') print('Detecting library usage...')
for library, includes in library_includes.items(): libraries = ['posix', 'gnu']
if __has_include(folder, includes):
library_includes_re = self.__library_includes_re
def has_include(filedata):
lib_del = []
for library, includes_re in library_includes_re.items():
if includes_re.search(filedata):
libraries.append(library) libraries.append(library)
lib_del.append(library)
for lib_d in lib_del:
del library_includes_re[lib_d]
self.__iterate_files(folder, has_include)
print('Found libraries: {}'.format(libraries)) print('Found libraries: {}'.format(libraries))
return libraries return libraries
@ -594,9 +630,10 @@ def get_compiler_version():
my_script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0] my_script_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]
jobs = '-j1' jobs = '-j1'
stop_time = None stop_time = None
work_path = os.path.expanduser('~/cppcheck-' + my_script_name + '-workfolder') work_path = os.path.expanduser(os.path.join('~', 'cppcheck-' + my_script_name + '-workfolder'))
package_url = None package_url = None
server_address = ('cppcheck1.osuosl.org', 8000) server_address = ('cppcheck1.osuosl.org', 8000)
bandwidth_limit = None bandwidth_limit = None
max_packages = None max_packages = None
do_upload = True do_upload = True
library_includes = LibraryIncludes()

View File

@ -132,8 +132,8 @@ if __name__ == "__main__":
main_timeout = False main_timeout = False
your_timeout = False your_timeout = False
libraries = lib.get_libraries(source_path) libraries = lib.library_includes.get_libraries(source_path)
c, errout, info, time_main, cppcheck_options, timing_info = lib.scan_package(work_path, main_dir, source_path, jobs, libraries) c, errout, info, time_main, cppcheck_options, timing_info = lib.scan_package(main_dir, source_path, jobs, libraries)
if c < 0: if c < 0:
if c == -101 and 'error: could not find or open any of the paths given.' in errout: if c == -101 and 'error: could not find or open any of the paths given.' in errout:
# No sourcefile found (for example only headers present) # No sourcefile found (for example only headers present)
@ -146,7 +146,7 @@ if __name__ == "__main__":
main_crashed = True main_crashed = True
results_to_diff.append(errout) results_to_diff.append(errout)
c, errout, info, time_your, cppcheck_options, timing_info = lib.scan_package(work_path, your_repo_dir, source_path, jobs, libraries) c, errout, info, time_your, cppcheck_options, timing_info = lib.scan_package(your_repo_dir, source_path, jobs, libraries)
if c < 0: if c < 0:
if c == -101 and 'error: could not find or open any of the paths given.' in errout: if c == -101 and 'error: could not find or open any of the paths given.' in errout:
# No sourcefile found (for example only headers present) # No sourcefile found (for example only headers present)

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
#
# Cppcheck - A tool for static C/C++ code analysis
# Copyright (C) 2007-2022 Cppcheck team.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from donate_cpu_lib import *
def _test_library_includes(tmpdir, libs, content):
library_includes = LibraryIncludes()
src_file = os.path.join(str(tmpdir), "file.cpp")
with open(src_file, 'w') as f:
f.write(content)
assert libs.sort() == library_includes.get_libraries(str(tmpdir)).sort()
def test_library_includes(tmpdir):
_test_library_includes(tmpdir, ['posix', 'gnu'], '')
_test_library_includes(tmpdir, ['posix', 'gnu'], '#include <stdio.h>')
_test_library_includes(tmpdir, ['posix', 'gnu', 'boost'], '#include <boost/regex.hpp>')
_test_library_includes(tmpdir, ['posix', 'gnu', 'python'], '#include "Python.h"')
_test_library_includes(tmpdir, ['posix', 'gnu', 'lua', 'opengl', 'qt'], '#include <QApplication>\n#include <GL/gl.h>\n#include "lua.h"')