From 8fc5c938033e6bbf8b19458574aa0c54da565e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20St=C3=B6neberg?= Date: Sat, 19 Mar 2022 19:42:44 +0100 Subject: [PATCH] refactored reduce.py into a class and added unit test for it (#3791) --- .github/workflows/scriptcheck.yml | 6 + tools/reduce.py | 615 ++++++++++++++++-------------- tools/test_reduce.py | 64 ++++ 3 files changed, 389 insertions(+), 296 deletions(-) create mode 100644 tools/test_reduce.py diff --git a/.github/workflows/scriptcheck.yml b/.github/workflows/scriptcheck.yml index 930eae9ba..0b6b93cd3 100644 --- a/.github/workflows/scriptcheck.yml +++ b/.github/workflows/scriptcheck.yml @@ -121,6 +121,12 @@ jobs: cd htmlreport ./check.sh + - name: test reduce + run: | + python -m pytest tools/test_reduce.py + env: + PYTHONPATH: ./tools + - name: dmake if: matrix.python-version == '3.10' run: | diff --git a/tools/reduce.py b/tools/reduce.py index ed2e17321..77e4a638e 100755 --- a/tools/reduce.py +++ b/tools/reduce.py @@ -3,326 +3,349 @@ import subprocess import sys import time -if sys.version_info[0] < 3: - class TimeoutExpired(Exception): - pass -else: - TimeoutExpired = subprocess.TimeoutExpired -def communicate(p, timeout=None, **kwargs): - if sys.version_info[0] < 3: - return p.communicate(**kwargs) - else: - return p.communicate(timeout=timeout) +class Reduce: + def __init__(self, cmd, expected, file, segfault=None): + if cmd is None: + raise RuntimeError('Abort: No --cmd') -# TODO: add --hang option to detect code which impacts the analysis time -def show_syntax(): - print('Syntax:') - print(' reduce.py --cmd= --expected= --file= [--segfault]') - print('') - print("Example. source file = foo/bar.c") - print(" reduce.py --cmd='./cppcheck --enable=style foo/bar.c' --expected=\"Variable 'x' is reassigned\" --file=foo/bar.c") - sys.exit(1) + if not segfault and expected is None: + raise RuntimeError('Abort: No --expected') -if len(sys.argv) == 1: - show_syntax() + if file is None: + raise RuntimeError('Abort: No --file') -CMD = None -EXPECTED = None -SEGFAULT = False -FILE = None -ORGFILE = None -BACKUPFILE = None -TIMEOUTFILE = None -for arg in sys.argv[1:]: - if arg.startswith('--cmd='): - CMD = arg[arg.find('=') + 1:] - elif arg.startswith('--expected='): - EXPECTED = arg[arg.find('=') + 1:] - elif arg.startswith('--file='): - FILE = arg[arg.find('=') + 1:] - ORGFILE = FILE + '.org' - BACKUPFILE = FILE + '.bak' - TIMEOUTFILE = FILE + '.timeout' - elif arg == '--segfault': - SEGFAULT = True + # need to add '--error-exitcode=0' so detected issues will not be interpreted as a crash + if segfault and '--error-exitcode=0' not in cmd: + print("Adding '--error-exitcode=0' to --cmd") + self.__cmd = cmd + ' --error-exitcode=0' + else: + self.__cmd = cmd + self.__expected = expected + self.__file = file + self.__segfault = segfault + self.__origfile = self.__file + '.org' + self.__backupfile = self.__file + '.bak' + self.__timeoutfile = self.__file + '.timeout' + self.__elapsed_time = None -if CMD is None: - print('Abort: No --cmd') - show_syntax() + def print_info(self): + print('CMD=' + self.__cmd) + if self.__segfault: + print('EXPECTED=SEGFAULT') + else: + print('EXPECTED=' + self.__expected) + print('FILE=' + self.__file) -if not SEGFAULT and EXPECTED is None: - print('Abort: No --expected') - show_syntax() + def __communicate(self, p, timeout=None, **kwargs): + if sys.version_info[0] < 3: + return p.communicate(**kwargs) + else: + return p.communicate(timeout=timeout) -# need to add '--error-exitcode=0' so detected issues will not be interpreted as a crash -if SEGFAULT and not '--error-exitcode=0' in CMD: - print("Adding '--error-exitcode=0' to --cmd") - CMD = CMD + ' --error-exitcode=0' + def runtool(self, filedata=None): + if sys.version_info[0] < 3: + class TimeoutExpired(Exception): + pass + else: + TimeoutExpired = subprocess.TimeoutExpired -if FILE is None: - print('Abort: No --file') - show_syntax() - -print('CMD=' + CMD) -if SEGFAULT: - print('EXPECTED=SEGFAULT') -else: - print('EXPECTED=' + EXPECTED) -print('FILE=' + FILE) - -def runtool(filedata=None): - timeout = None - if elapsed_time: - timeout = elapsed_time * 2 - p = subprocess.Popen(CMD.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) - try: - comm = communicate(p, timeout=timeout) - except TimeoutExpired: - print('timeout') - p.kill() - p.communicate() - if filedata: - writefile(TIMEOUTFILE, filedata) + timeout = None + if self.__elapsed_time: + timeout = self.__elapsed_time * 2 + p = subprocess.Popen(self.__cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + try: + comm = self.__communicate(p, timeout=timeout) + except TimeoutExpired: + print('timeout') + p.kill() + p.communicate() + if filedata: + self.writetimeoutfile(filedata) + return False + # print(p.returncode) + # print(comm) + if self.__segfault: + if p.returncode != 0: + return True + elif p.returncode == 0: + out = comm[0] + '\n' + comm[1] + if self.__expected in out: + return True + else: + # Something could be wrong, for example the command line for Cppcheck (CMD). + # Print the output to give a hint how to fix it. + print('Error: {}\n{}'.format(comm[0], comm[1])) return False - #print(p.returncode) - #print(comm) - if SEGFAULT: - if p.returncode != 0: + + def __writefile(self, filename, filedata): + f = open(filename, 'wt') + for line in filedata: + f.write(line) + f.close() + + def replaceandrun(self, what, filedata, i, line): + print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..') + bak = filedata[i] + filedata[i] = line + self.writefile(filedata) + if self.runtool(filedata): + print('pass') + self.writebackupfile(filedata) return True - elif p.returncode == 0: - out = comm[0] + '\n' + comm[1] - if EXPECTED in out: - return True - else: - # Something could be wrong, for example the command line for Cppcheck (CMD). - # Print the output to give a hint how to fix it. - print('Error: {}\n{}'.format(comm[0], comm[1])) - return False - - -def writefile(filename, filedata): - f = open(filename, 'wt') - for line in filedata: - f.write(line) - f.close() - - -def replaceandrun(what, filedata, i, line): - print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..') - bak = filedata[i] - filedata[i] = line - writefile(FILE, filedata) - if runtool(filedata): - print('pass') - writefile(BACKUPFILE, filedata) - return True - print('fail') - filedata[i] = bak - return False - - -def replaceandrun2(what, filedata, i, line1, line2): - print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..') - bak1 = filedata[i] - bak2 = filedata[i + 1] - filedata[i] = line1 - filedata[i + 1] = line2 - writefile(FILE, filedata) - if runtool(filedata): - print('pass') - writefile(BACKUPFILE, filedata) - else: print('fail') - filedata[i] = bak1 - filedata[i + 1] = bak2 + filedata[i] = bak + return False + def replaceandrun2(self, what, filedata, i, line1, line2): + print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..') + bak1 = filedata[i] + bak2 = filedata[i + 1] + filedata[i] = line1 + filedata[i + 1] = line2 + self.writefile(filedata) + if self.runtool(filedata): + print('pass') + self.writebackupfile(filedata) + else: + print('fail') + filedata[i] = bak1 + filedata[i + 1] = bak2 -def clearandrun(what, filedata, i1, i2): - print(what + ' ' + str(i1 + 1) + '/' + str(len(filedata)) + '..') - filedata2 = list(filedata) - i = i1 - while i <= i2 and i < len(filedata2): - filedata2[i] = '' - i = i + 1 - writefile(FILE, filedata2) - if runtool(filedata2): - print('pass') - writefile(BACKUPFILE, filedata2) - return filedata2 - print('fail') - return filedata - - -def removecomments(filedata): - for i in range(len(filedata)): - line = filedata[i] - if '//' in line: - replaceandrun('remove comment', filedata, i, line[:line.find('//')].rstrip() + '\n') - - -def checkpar(line): - par = 0 - for c in line: - if c == '(' or c == '[': - par = par + 1 - elif c == ')' or c == ']': - par = par - 1 - if par < 0: - return False - return par == 0 - - -def combinelines(filedata): - if len(filedata) < 3: - return - - lines = [] - - for i in range(len(filedata) - 1): - fd1 = filedata[i].rstrip() - if fd1.endswith(','): - fd2 = filedata[i + 1].lstrip() - if fd2 != '': - lines.append(i) - - chunksize = len(lines) - while chunksize > 10: - i = 0 - while i < len(lines): - i1 = i - i2 = i + chunksize - i = i2 - if i2 > len(lines): - i2 = len(lines) - - filedata2 = list(filedata) - for line in lines[i1:i2]: - filedata2[line] = filedata2[line].rstrip() + filedata2[line + 1].lstrip() - filedata2[line + 1] = '' - - if replaceandrun('combine lines', filedata2, lines[i1] + 1, ''): - filedata = filedata2 - lines[i1:i2] = [] - i = i1 - - chunksize = chunksize / 2 - - for line in lines: - fd1 = filedata[line].rstrip() - fd2 = filedata[line + 1].lstrip() - replaceandrun2('combine lines', filedata, line, fd1 + fd2, '') - - -def removedirectives(filedata): - for i in range(len(filedata)): - line = filedata[i].lstrip() - if line.startswith('#'): - # these cannot be removed on their own so skip them - if line.startswith('#if') or line.startswith('#endif') or line.startswith('#el'): - continue - replaceandrun('remove preprocessor directive', filedata, i, '') - - -def removeblocks(filedata): - if len(filedata) < 3: + def clearandrun(self, what, filedata, i1, i2): + print(what + ' ' + str(i1 + 1) + '/' + str(len(filedata)) + '..') + filedata2 = list(filedata) + i = i1 + while i <= i2 and i < len(filedata2): + filedata2[i] = '' + i = i + 1 + self.writefile(filedata2) + if self.runtool(filedata2): + print('pass') + self.writebackupfile(filedata2) + return filedata2 + print('fail') return filedata - for i in range(len(filedata)): - strippedline = filedata[i].strip() - if len(strippedline) == 0: - continue - if strippedline[-1] not in ';{}': - continue + def removecomments(self, filedata): + for i in range(len(filedata)): + line = filedata[i] + if '//' in line: + self.replaceandrun('remove comment', filedata, i, line[:line.find('//')].rstrip() + '\n') - i1 = i + 1 - while i1 < len(filedata) and filedata[i1].startswith('#'): - i1 = i1 + 1 + def checkpar(self, line): + par = 0 + for c in line: + if c == '(' or c == '[': + par = par + 1 + elif c == ')' or c == ']': + par = par - 1 + if par < 0: + return False + return par == 0 - i2 = i1 - indent = 0 - while i2 < len(filedata): - for c in filedata[i2]: - if c == '}': - indent = indent - 1 - if indent == 0: - indent = -100 - elif c == '{': - indent = indent + 1 - if indent < 0: - break - i2 = i2 + 1 - if indent == -100: + def combinelines(self, filedata): + if len(filedata) < 3: + return + + lines = [] + + for i in range(len(filedata) - 1): + fd1 = filedata[i].rstrip() + if fd1.endswith(','): + fd2 = filedata[i + 1].lstrip() + if fd2 != '': + lines.append(i) + + chunksize = len(lines) + while chunksize > 10: + i = 0 + while i < len(lines): + i1 = i + i2 = i + chunksize + i = i2 + if i2 > len(lines): + i2 = len(lines) + + filedata2 = list(filedata) + for line in lines[i1:i2]: + filedata2[line] = filedata2[line].rstrip() + filedata2[line + 1].lstrip() + filedata2[line + 1] = '' + + if self.replaceandrun('combine lines', filedata2, lines[i1] + 1, ''): + filedata = filedata2 + lines[i1:i2] = [] + i = i1 + + chunksize = chunksize / 2 + + for line in lines: + fd1 = filedata[line].rstrip() + fd2 = filedata[line + 1].lstrip() + self.replaceandrun2('combine lines', filedata, line, fd1 + fd2, '') + + def removedirectives(self, filedata): + for i in range(len(filedata)): + line = filedata[i].lstrip() + if line.startswith('#'): + # these cannot be removed on their own so skip them + if line.startswith('#if') or line.startswith('#endif') or line.startswith('#el'): + continue + self.replaceandrun('remove preprocessor directive', filedata, i, '') + + def removeblocks(self, filedata): + if len(filedata) < 3: + return filedata + + for i in range(len(filedata)): + strippedline = filedata[i].strip() + if len(strippedline) == 0: + continue + if strippedline[-1] not in ';{}': + continue + + i1 = i + 1 + while i1 < len(filedata) and filedata[i1].startswith('#'): + i1 = i1 + 1 + + i2 = i1 indent = 0 - if i2 == i1 or i2 >= len(filedata): - continue - if filedata[i2].strip() != '}' and filedata[i2].strip() != '};': - continue - if indent < 0: - i2 = i2 - 1 - filedata = clearandrun('remove codeblock', filedata, i1, i2) + while i2 < len(filedata): + for c in filedata[i2]: + if c == '}': + indent = indent - 1 + if indent == 0: + indent = -100 + elif c == '{': + indent = indent + 1 + if indent < 0: + break + i2 = i2 + 1 + if indent == -100: + indent = 0 + if i2 == i1 or i2 >= len(filedata): + continue + if filedata[i2].strip() != '}' and filedata[i2].strip() != '};': + continue + if indent < 0: + i2 = i2 - 1 + filedata = self.clearandrun('remove codeblock', filedata, i1, i2) - return filedata + return filedata + + def removeline(self, filedata): + stmt = True + for i in range(len(filedata)): + line = filedata[i] + strippedline = line.strip() + + if len(strippedline) == 0: + continue + + if stmt and strippedline[-1] == ';' and self.checkpar(line) and '{' not in line and '}' not in line: + self.replaceandrun('remove line', filedata, i, '') + + elif stmt and '{' in strippedline and strippedline.find('}') == len(strippedline) - 1: + self.replaceandrun('remove line', filedata, i, '') + + if strippedline[-1] in ';{}': + stmt = True + else: + stmt = False + + def set_elapsed_time(self, elapsed_time): + self.__elapsed_time = elapsed_time + + def writefile(self, filedata): + self.__writefile(self.__file, filedata) + + def writeorigfile(self, filedata): + self.__writefile(self.__origfile, filedata) + + def writebackupfile(self, filedata): + self.__writefile(self.__backupfile, filedata) + + def writetimeoutfile(self, filedata): + self.__writefile(self.__timeoutfile, filedata) -def removeline(filedata): - stmt = True - for i in range(len(filedata)): - line = filedata[i] - strippedline = line.strip() +def main(): + # TODO: add --hang option to detect code which impacts the analysis time + def show_syntax(): + print('Syntax:') + print(' reduce.py --cmd= --expected= --file= [--segfault]') + print('') + print("Example. source file = foo/bar.c") + print( + " reduce.py --cmd='./cppcheck --enable=style foo/bar.c' --expected=\"Variable 'x' is reassigned\" --file=foo/bar.c") + sys.exit(1) - if len(strippedline) == 0: - continue + if len(sys.argv) == 1: + show_syntax() - if stmt and strippedline[-1] == ';' and checkpar(line) and '{' not in line and '}' not in line: - replaceandrun('remove line', filedata, i, '') + arg_cmd = None + arg_expected = None + arg_file = None + arg_segfault = False - elif stmt and '{' in strippedline and strippedline.find('}') == len(strippedline) - 1: - replaceandrun('remove line', filedata, i, '') + for arg in sys.argv[1:]: + if arg.startswith('--cmd='): + arg_cmd = arg[arg.find('=') + 1:] + elif arg.startswith('--expected='): + arg_expected = arg[arg.find('=') + 1:] + elif arg.startswith('--file='): + arg_file = arg[arg.find('=') + 1:] + elif arg == '--segfault': + arg_segfault = True - if strippedline[-1] in ';{}': - stmt = True - else: - stmt = False + try: + reduce = Reduce(arg_cmd, arg_expected, arg_file, arg_segfault) + except RuntimeError as e: + print(e) + show_syntax() + + reduce.print_info() + + # reduce.. + print('Make sure error can be reproduced...') + t = time.time() + if not reduce.runtool(): + print("Cannot reproduce") + sys.exit(1) + elapsed_time = time.time() - t + reduce.set_elapsed_time(elapsed_time) + print('elapsed_time: {}'.format(elapsed_time)) + + with open(arg_file, 'rt') as f: + filedata = f.readlines() + + reduce.writeorigfile(filedata) + + while True: + filedata1 = list(filedata) + + print('remove preprocessor directives...') + reduce.removedirectives(filedata) + + print('remove blocks...') + filedata = reduce.removeblocks(filedata) + + print('remove comments...') + reduce.removecomments(filedata) + + print('combine lines..') + reduce.combinelines(filedata) + + print('remove line...') + reduce.removeline(filedata) + + # if filedata and filedata2 are identical then stop + if filedata1 == filedata: + break + + reduce.writefile(filedata) + print('DONE') -# reduce.. -print('Make sure error can be reproduced...') -elapsed_time = None -t = time.time() -if not runtool(): - print("Cannot reproduce") - sys.exit(1) -elapsed_time = time.time() - t -print('elapsed_time: {}'.format(elapsed_time)) - -f = open(FILE, 'rt') -filedata = f.readlines() -f.close() - -writefile(ORGFILE, filedata) - -while True: - filedata1 = list(filedata) - - print('remove preprocessor directives...') - removedirectives(filedata) - - print('remove blocks...') - filedata = removeblocks(filedata) - - print('remove comments...') - removecomments(filedata) - - print('combine lines..') - combinelines(filedata) - - print('remove line...') - removeline(filedata) - - # if filedata and filedata2 are identical then stop - if filedata1 == filedata: - break - -writefile(FILE, filedata) -print('DONE') +if __name__ == '__main__': + main() diff --git a/tools/test_reduce.py b/tools/test_reduce.py new file mode 100644 index 000000000..1e0674c68 --- /dev/null +++ b/tools/test_reduce.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +from reduce import Reduce + + +class ReduceTest(Reduce): + def __init__(self): + # we do not want the super __init__ to be called + # super().__init__('', '', '') + pass + + def runtool(self, filedata=None): + return True + + def writefile(self, filedata): + pass + + def writebackupfile(self, filedata): + pass + + +def test_removecomments(): + """make sure we keep the \n when removing a comment at the end of a line""" + + reduce = ReduceTest() + + filedata = [ + 'int i; // some integer\n', + 'int j;\n' + ] + + expected = [ + 'int i;\n', + 'int j;\n' + ] + + reduce.removecomments(filedata) + assert filedata == expected + + +def test_removedirectives(): + """do not remove any of the #if*, #el* or #endif directives on their own""" + + reduce = ReduceTest() + + filedata = [ + '#if 0\n', + '#else\n', + '#endif\n', + '#ifdef DEF\n', + '#elif 0\n' + '#endif\n' + ] + + expected = [ + '#if 0\n', + '#else\n', + '#endif\n', + '#ifdef DEF\n', + '#elif 0\n' + '#endif\n' + ] + + reduce.removedirectives(filedata) + assert filedata == expected