refactored reduce.py into a class and added unit test for it (#3791)

This commit is contained in:
Oliver Stöneberg 2022-03-19 19:42:44 +01:00 committed by GitHub
parent f907bba4e5
commit 8fc5c93803
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 389 additions and 296 deletions

View File

@ -121,6 +121,12 @@ jobs:
cd htmlreport
./check.sh
- name: test reduce
run: |
python -m pytest tools/test_reduce.py
env:
PYTHONPATH: ./tools
- name: dmake
if: matrix.python-version == '3.10'
run: |

View File

@ -3,326 +3,349 @@ import subprocess
import sys
import time
if sys.version_info[0] < 3:
class TimeoutExpired(Exception):
pass
else:
TimeoutExpired = subprocess.TimeoutExpired
def communicate(p, timeout=None, **kwargs):
if sys.version_info[0] < 3:
return p.communicate(**kwargs)
else:
return p.communicate(timeout=timeout)
class Reduce:
def __init__(self, cmd, expected, file, segfault=None):
if cmd is None:
raise RuntimeError('Abort: No --cmd')
# TODO: add --hang option to detect code which impacts the analysis time
def show_syntax():
print('Syntax:')
print(' reduce.py --cmd=<full command> --expected=<expected text output> --file=<source file> [--segfault]')
print('')
print("Example. source file = foo/bar.c")
print(" reduce.py --cmd='./cppcheck --enable=style foo/bar.c' --expected=\"Variable 'x' is reassigned\" --file=foo/bar.c")
sys.exit(1)
if not segfault and expected is None:
raise RuntimeError('Abort: No --expected')
if len(sys.argv) == 1:
show_syntax()
if file is None:
raise RuntimeError('Abort: No --file')
CMD = None
EXPECTED = None
SEGFAULT = False
FILE = None
ORGFILE = None
BACKUPFILE = None
TIMEOUTFILE = None
for arg in sys.argv[1:]:
if arg.startswith('--cmd='):
CMD = arg[arg.find('=') + 1:]
elif arg.startswith('--expected='):
EXPECTED = arg[arg.find('=') + 1:]
elif arg.startswith('--file='):
FILE = arg[arg.find('=') + 1:]
ORGFILE = FILE + '.org'
BACKUPFILE = FILE + '.bak'
TIMEOUTFILE = FILE + '.timeout'
elif arg == '--segfault':
SEGFAULT = True
# need to add '--error-exitcode=0' so detected issues will not be interpreted as a crash
if segfault and '--error-exitcode=0' not in cmd:
print("Adding '--error-exitcode=0' to --cmd")
self.__cmd = cmd + ' --error-exitcode=0'
else:
self.__cmd = cmd
self.__expected = expected
self.__file = file
self.__segfault = segfault
self.__origfile = self.__file + '.org'
self.__backupfile = self.__file + '.bak'
self.__timeoutfile = self.__file + '.timeout'
self.__elapsed_time = None
if CMD is None:
print('Abort: No --cmd')
show_syntax()
def print_info(self):
print('CMD=' + self.__cmd)
if self.__segfault:
print('EXPECTED=SEGFAULT')
else:
print('EXPECTED=' + self.__expected)
print('FILE=' + self.__file)
if not SEGFAULT and EXPECTED is None:
print('Abort: No --expected')
show_syntax()
def __communicate(self, p, timeout=None, **kwargs):
if sys.version_info[0] < 3:
return p.communicate(**kwargs)
else:
return p.communicate(timeout=timeout)
# need to add '--error-exitcode=0' so detected issues will not be interpreted as a crash
if SEGFAULT and not '--error-exitcode=0' in CMD:
print("Adding '--error-exitcode=0' to --cmd")
CMD = CMD + ' --error-exitcode=0'
def runtool(self, filedata=None):
if sys.version_info[0] < 3:
class TimeoutExpired(Exception):
pass
else:
TimeoutExpired = subprocess.TimeoutExpired
if FILE is None:
print('Abort: No --file')
show_syntax()
print('CMD=' + CMD)
if SEGFAULT:
print('EXPECTED=SEGFAULT')
else:
print('EXPECTED=' + EXPECTED)
print('FILE=' + FILE)
def runtool(filedata=None):
timeout = None
if elapsed_time:
timeout = elapsed_time * 2
p = subprocess.Popen(CMD.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
try:
comm = communicate(p, timeout=timeout)
except TimeoutExpired:
print('timeout')
p.kill()
p.communicate()
if filedata:
writefile(TIMEOUTFILE, filedata)
timeout = None
if self.__elapsed_time:
timeout = self.__elapsed_time * 2
p = subprocess.Popen(self.__cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
try:
comm = self.__communicate(p, timeout=timeout)
except TimeoutExpired:
print('timeout')
p.kill()
p.communicate()
if filedata:
self.writetimeoutfile(filedata)
return False
# print(p.returncode)
# print(comm)
if self.__segfault:
if p.returncode != 0:
return True
elif p.returncode == 0:
out = comm[0] + '\n' + comm[1]
if self.__expected in out:
return True
else:
# Something could be wrong, for example the command line for Cppcheck (CMD).
# Print the output to give a hint how to fix it.
print('Error: {}\n{}'.format(comm[0], comm[1]))
return False
#print(p.returncode)
#print(comm)
if SEGFAULT:
if p.returncode != 0:
def __writefile(self, filename, filedata):
f = open(filename, 'wt')
for line in filedata:
f.write(line)
f.close()
def replaceandrun(self, what, filedata, i, line):
print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..')
bak = filedata[i]
filedata[i] = line
self.writefile(filedata)
if self.runtool(filedata):
print('pass')
self.writebackupfile(filedata)
return True
elif p.returncode == 0:
out = comm[0] + '\n' + comm[1]
if EXPECTED in out:
return True
else:
# Something could be wrong, for example the command line for Cppcheck (CMD).
# Print the output to give a hint how to fix it.
print('Error: {}\n{}'.format(comm[0], comm[1]))
return False
def writefile(filename, filedata):
f = open(filename, 'wt')
for line in filedata:
f.write(line)
f.close()
def replaceandrun(what, filedata, i, line):
print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..')
bak = filedata[i]
filedata[i] = line
writefile(FILE, filedata)
if runtool(filedata):
print('pass')
writefile(BACKUPFILE, filedata)
return True
print('fail')
filedata[i] = bak
return False
def replaceandrun2(what, filedata, i, line1, line2):
print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..')
bak1 = filedata[i]
bak2 = filedata[i + 1]
filedata[i] = line1
filedata[i + 1] = line2
writefile(FILE, filedata)
if runtool(filedata):
print('pass')
writefile(BACKUPFILE, filedata)
else:
print('fail')
filedata[i] = bak1
filedata[i + 1] = bak2
filedata[i] = bak
return False
def replaceandrun2(self, what, filedata, i, line1, line2):
print(what + ' ' + str(i + 1) + '/' + str(len(filedata)) + '..')
bak1 = filedata[i]
bak2 = filedata[i + 1]
filedata[i] = line1
filedata[i + 1] = line2
self.writefile(filedata)
if self.runtool(filedata):
print('pass')
self.writebackupfile(filedata)
else:
print('fail')
filedata[i] = bak1
filedata[i + 1] = bak2
def clearandrun(what, filedata, i1, i2):
print(what + ' ' + str(i1 + 1) + '/' + str(len(filedata)) + '..')
filedata2 = list(filedata)
i = i1
while i <= i2 and i < len(filedata2):
filedata2[i] = ''
i = i + 1
writefile(FILE, filedata2)
if runtool(filedata2):
print('pass')
writefile(BACKUPFILE, filedata2)
return filedata2
print('fail')
return filedata
def removecomments(filedata):
for i in range(len(filedata)):
line = filedata[i]
if '//' in line:
replaceandrun('remove comment', filedata, i, line[:line.find('//')].rstrip() + '\n')
def checkpar(line):
par = 0
for c in line:
if c == '(' or c == '[':
par = par + 1
elif c == ')' or c == ']':
par = par - 1
if par < 0:
return False
return par == 0
def combinelines(filedata):
if len(filedata) < 3:
return
lines = []
for i in range(len(filedata) - 1):
fd1 = filedata[i].rstrip()
if fd1.endswith(','):
fd2 = filedata[i + 1].lstrip()
if fd2 != '':
lines.append(i)
chunksize = len(lines)
while chunksize > 10:
i = 0
while i < len(lines):
i1 = i
i2 = i + chunksize
i = i2
if i2 > len(lines):
i2 = len(lines)
filedata2 = list(filedata)
for line in lines[i1:i2]:
filedata2[line] = filedata2[line].rstrip() + filedata2[line + 1].lstrip()
filedata2[line + 1] = ''
if replaceandrun('combine lines', filedata2, lines[i1] + 1, ''):
filedata = filedata2
lines[i1:i2] = []
i = i1
chunksize = chunksize / 2
for line in lines:
fd1 = filedata[line].rstrip()
fd2 = filedata[line + 1].lstrip()
replaceandrun2('combine lines', filedata, line, fd1 + fd2, '')
def removedirectives(filedata):
for i in range(len(filedata)):
line = filedata[i].lstrip()
if line.startswith('#'):
# these cannot be removed on their own so skip them
if line.startswith('#if') or line.startswith('#endif') or line.startswith('#el'):
continue
replaceandrun('remove preprocessor directive', filedata, i, '')
def removeblocks(filedata):
if len(filedata) < 3:
def clearandrun(self, what, filedata, i1, i2):
print(what + ' ' + str(i1 + 1) + '/' + str(len(filedata)) + '..')
filedata2 = list(filedata)
i = i1
while i <= i2 and i < len(filedata2):
filedata2[i] = ''
i = i + 1
self.writefile(filedata2)
if self.runtool(filedata2):
print('pass')
self.writebackupfile(filedata2)
return filedata2
print('fail')
return filedata
for i in range(len(filedata)):
strippedline = filedata[i].strip()
if len(strippedline) == 0:
continue
if strippedline[-1] not in ';{}':
continue
def removecomments(self, filedata):
for i in range(len(filedata)):
line = filedata[i]
if '//' in line:
self.replaceandrun('remove comment', filedata, i, line[:line.find('//')].rstrip() + '\n')
i1 = i + 1
while i1 < len(filedata) and filedata[i1].startswith('#'):
i1 = i1 + 1
def checkpar(self, line):
par = 0
for c in line:
if c == '(' or c == '[':
par = par + 1
elif c == ')' or c == ']':
par = par - 1
if par < 0:
return False
return par == 0
i2 = i1
indent = 0
while i2 < len(filedata):
for c in filedata[i2]:
if c == '}':
indent = indent - 1
if indent == 0:
indent = -100
elif c == '{':
indent = indent + 1
if indent < 0:
break
i2 = i2 + 1
if indent == -100:
def combinelines(self, filedata):
if len(filedata) < 3:
return
lines = []
for i in range(len(filedata) - 1):
fd1 = filedata[i].rstrip()
if fd1.endswith(','):
fd2 = filedata[i + 1].lstrip()
if fd2 != '':
lines.append(i)
chunksize = len(lines)
while chunksize > 10:
i = 0
while i < len(lines):
i1 = i
i2 = i + chunksize
i = i2
if i2 > len(lines):
i2 = len(lines)
filedata2 = list(filedata)
for line in lines[i1:i2]:
filedata2[line] = filedata2[line].rstrip() + filedata2[line + 1].lstrip()
filedata2[line + 1] = ''
if self.replaceandrun('combine lines', filedata2, lines[i1] + 1, ''):
filedata = filedata2
lines[i1:i2] = []
i = i1
chunksize = chunksize / 2
for line in lines:
fd1 = filedata[line].rstrip()
fd2 = filedata[line + 1].lstrip()
self.replaceandrun2('combine lines', filedata, line, fd1 + fd2, '')
def removedirectives(self, filedata):
for i in range(len(filedata)):
line = filedata[i].lstrip()
if line.startswith('#'):
# these cannot be removed on their own so skip them
if line.startswith('#if') or line.startswith('#endif') or line.startswith('#el'):
continue
self.replaceandrun('remove preprocessor directive', filedata, i, '')
def removeblocks(self, filedata):
if len(filedata) < 3:
return filedata
for i in range(len(filedata)):
strippedline = filedata[i].strip()
if len(strippedline) == 0:
continue
if strippedline[-1] not in ';{}':
continue
i1 = i + 1
while i1 < len(filedata) and filedata[i1].startswith('#'):
i1 = i1 + 1
i2 = i1
indent = 0
if i2 == i1 or i2 >= len(filedata):
continue
if filedata[i2].strip() != '}' and filedata[i2].strip() != '};':
continue
if indent < 0:
i2 = i2 - 1
filedata = clearandrun('remove codeblock', filedata, i1, i2)
while i2 < len(filedata):
for c in filedata[i2]:
if c == '}':
indent = indent - 1
if indent == 0:
indent = -100
elif c == '{':
indent = indent + 1
if indent < 0:
break
i2 = i2 + 1
if indent == -100:
indent = 0
if i2 == i1 or i2 >= len(filedata):
continue
if filedata[i2].strip() != '}' and filedata[i2].strip() != '};':
continue
if indent < 0:
i2 = i2 - 1
filedata = self.clearandrun('remove codeblock', filedata, i1, i2)
return filedata
return filedata
def removeline(self, filedata):
stmt = True
for i in range(len(filedata)):
line = filedata[i]
strippedline = line.strip()
if len(strippedline) == 0:
continue
if stmt and strippedline[-1] == ';' and self.checkpar(line) and '{' not in line and '}' not in line:
self.replaceandrun('remove line', filedata, i, '')
elif stmt and '{' in strippedline and strippedline.find('}') == len(strippedline) - 1:
self.replaceandrun('remove line', filedata, i, '')
if strippedline[-1] in ';{}':
stmt = True
else:
stmt = False
def set_elapsed_time(self, elapsed_time):
self.__elapsed_time = elapsed_time
def writefile(self, filedata):
self.__writefile(self.__file, filedata)
def writeorigfile(self, filedata):
self.__writefile(self.__origfile, filedata)
def writebackupfile(self, filedata):
self.__writefile(self.__backupfile, filedata)
def writetimeoutfile(self, filedata):
self.__writefile(self.__timeoutfile, filedata)
def removeline(filedata):
stmt = True
for i in range(len(filedata)):
line = filedata[i]
strippedline = line.strip()
def main():
# TODO: add --hang option to detect code which impacts the analysis time
def show_syntax():
print('Syntax:')
print(' reduce.py --cmd=<full command> --expected=<expected text output> --file=<source file> [--segfault]')
print('')
print("Example. source file = foo/bar.c")
print(
" reduce.py --cmd='./cppcheck --enable=style foo/bar.c' --expected=\"Variable 'x' is reassigned\" --file=foo/bar.c")
sys.exit(1)
if len(strippedline) == 0:
continue
if len(sys.argv) == 1:
show_syntax()
if stmt and strippedline[-1] == ';' and checkpar(line) and '{' not in line and '}' not in line:
replaceandrun('remove line', filedata, i, '')
arg_cmd = None
arg_expected = None
arg_file = None
arg_segfault = False
elif stmt and '{' in strippedline and strippedline.find('}') == len(strippedline) - 1:
replaceandrun('remove line', filedata, i, '')
for arg in sys.argv[1:]:
if arg.startswith('--cmd='):
arg_cmd = arg[arg.find('=') + 1:]
elif arg.startswith('--expected='):
arg_expected = arg[arg.find('=') + 1:]
elif arg.startswith('--file='):
arg_file = arg[arg.find('=') + 1:]
elif arg == '--segfault':
arg_segfault = True
if strippedline[-1] in ';{}':
stmt = True
else:
stmt = False
try:
reduce = Reduce(arg_cmd, arg_expected, arg_file, arg_segfault)
except RuntimeError as e:
print(e)
show_syntax()
reduce.print_info()
# reduce..
print('Make sure error can be reproduced...')
t = time.time()
if not reduce.runtool():
print("Cannot reproduce")
sys.exit(1)
elapsed_time = time.time() - t
reduce.set_elapsed_time(elapsed_time)
print('elapsed_time: {}'.format(elapsed_time))
with open(arg_file, 'rt') as f:
filedata = f.readlines()
reduce.writeorigfile(filedata)
while True:
filedata1 = list(filedata)
print('remove preprocessor directives...')
reduce.removedirectives(filedata)
print('remove blocks...')
filedata = reduce.removeblocks(filedata)
print('remove comments...')
reduce.removecomments(filedata)
print('combine lines..')
reduce.combinelines(filedata)
print('remove line...')
reduce.removeline(filedata)
# if filedata and filedata2 are identical then stop
if filedata1 == filedata:
break
reduce.writefile(filedata)
print('DONE')
# reduce..
print('Make sure error can be reproduced...')
elapsed_time = None
t = time.time()
if not runtool():
print("Cannot reproduce")
sys.exit(1)
elapsed_time = time.time() - t
print('elapsed_time: {}'.format(elapsed_time))
f = open(FILE, 'rt')
filedata = f.readlines()
f.close()
writefile(ORGFILE, filedata)
while True:
filedata1 = list(filedata)
print('remove preprocessor directives...')
removedirectives(filedata)
print('remove blocks...')
filedata = removeblocks(filedata)
print('remove comments...')
removecomments(filedata)
print('combine lines..')
combinelines(filedata)
print('remove line...')
removeline(filedata)
# if filedata and filedata2 are identical then stop
if filedata1 == filedata:
break
writefile(FILE, filedata)
print('DONE')
if __name__ == '__main__':
main()

64
tools/test_reduce.py Normal file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env python
from reduce import Reduce
class ReduceTest(Reduce):
def __init__(self):
# we do not want the super __init__ to be called
# super().__init__('', '', '')
pass
def runtool(self, filedata=None):
return True
def writefile(self, filedata):
pass
def writebackupfile(self, filedata):
pass
def test_removecomments():
"""make sure we keep the \n when removing a comment at the end of a line"""
reduce = ReduceTest()
filedata = [
'int i; // some integer\n',
'int j;\n'
]
expected = [
'int i;\n',
'int j;\n'
]
reduce.removecomments(filedata)
assert filedata == expected
def test_removedirectives():
"""do not remove any of the #if*, #el* or #endif directives on their own"""
reduce = ReduceTest()
filedata = [
'#if 0\n',
'#else\n',
'#endif\n',
'#ifdef DEF\n',
'#elif 0\n'
'#endif\n'
]
expected = [
'#if 0\n',
'#else\n',
'#endif\n',
'#ifdef DEF\n',
'#elif 0\n'
'#endif\n'
]
reduce.removedirectives(filedata)
assert filedata == expected