Match compiler: Collect and cache plain C-strings
Profiler runs with gperftools showed we were spending a lot of time in strlen() even with compiled match patterns. If you compare a std::string with a plain C-string, the comparison operator needs to calculate the length of the C-string via strlen(). We can avoid this by turning all C-strings into C++ strings, which cache the string length internally. The match compiler has been adapted to collect all C-strings during compilation and aggregate them. Benchmark on the 'rpm' code base on a Core i7 920 box: cppcheck without matchcompiler: real 0m30.977s cppcheck with previous matchcompiler: real 0m28.157s cppcheck with cached C-strings: real 0m17.823s
This commit is contained in:
parent
fe5ee76165
commit
3c915b534d
|
@ -3,7 +3,17 @@
|
||||||
import re
|
import re
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
def compileCmd(tok):
|
def insertMatchStr(matchStrs, look_for):
|
||||||
|
prefix = 'matchStr'
|
||||||
|
|
||||||
|
# Add entry if needed
|
||||||
|
if look_for not in matchStrs:
|
||||||
|
pos = len(matchStrs) + 1
|
||||||
|
matchStrs[look_for] = pos
|
||||||
|
|
||||||
|
return prefix + str(matchStrs[look_for])
|
||||||
|
|
||||||
|
def compileCmd(tok, matchStrs):
|
||||||
if tok == '%any%':
|
if tok == '%any%':
|
||||||
return 'true'
|
return 'true'
|
||||||
elif tok == '%bool%':
|
elif tok == '%bool%':
|
||||||
|
@ -17,22 +27,23 @@ def compileCmd(tok):
|
||||||
elif tok == '%op%':
|
elif tok == '%op%':
|
||||||
return 'tok->isOp()'
|
return 'tok->isOp()'
|
||||||
elif tok == '%or%':
|
elif tok == '%or%':
|
||||||
return '(tok->str()=="|")'
|
return '(tok->str()==' + insertMatchStr(matchStrs, '|') + ')/* | */'
|
||||||
elif tok == '%oror%':
|
elif tok == '%oror%':
|
||||||
return '(tok->str()=="||")'
|
return '(tok->str()==' + insertMatchStr(matchStrs, '||') + ')/* || */'
|
||||||
elif tok == '%str%':
|
elif tok == '%str%':
|
||||||
return '(tok->type()==Token::eString)'
|
return '(tok->type()==Token::eString)'
|
||||||
elif tok == '%type%':
|
elif tok == '%type%':
|
||||||
return '(tok->isName() && tok->varId()==0U && tok->str() != "delete")'
|
return '(tok->isName() && tok->varId()==0U && tok->str() != ' + insertMatchStr(matchStrs, 'delete') + '/* delete */)'
|
||||||
elif tok == '%var%':
|
elif tok == '%var%':
|
||||||
return 'tok->isName()'
|
return 'tok->isName()'
|
||||||
elif tok == '%varid%':
|
elif tok == '%varid%':
|
||||||
return '(tok->isName() && tok->varId()==varid)'
|
return '(tok->isName() && tok->varId()==varid)'
|
||||||
elif (len(tok)>2) and (tok[0]=="%"):
|
elif (len(tok)>2) and (tok[0]=="%"):
|
||||||
print ("unhandled:" + tok)
|
print ("unhandled:" + tok)
|
||||||
return '(tok->str()=="'+tok+'")'
|
|
||||||
|
|
||||||
def compilePattern(pattern, nr, varid):
|
return '(tok->str()==' + insertMatchStr(matchStrs, tok) + ')/* ' + tok + ' */'
|
||||||
|
|
||||||
|
def compilePattern(matchStrs, pattern, nr, varid):
|
||||||
arg2 = ''
|
arg2 = ''
|
||||||
if varid:
|
if varid:
|
||||||
arg2 = ', const unsigned int varid'
|
arg2 = ', const unsigned int varid'
|
||||||
|
@ -76,7 +87,7 @@ def compilePattern(pattern, nr, varid):
|
||||||
if not first:
|
if not first:
|
||||||
ret = ret + logicalOp
|
ret = ret + logicalOp
|
||||||
first = False
|
first = False
|
||||||
ret = ret + neg + compileCmd(tok2)
|
ret = ret + neg + compileCmd(tok2, matchStrs)
|
||||||
|
|
||||||
if "" in tokens2:
|
if "" in tokens2:
|
||||||
ret = ret + '))\n'
|
ret = ret + '))\n'
|
||||||
|
@ -88,14 +99,15 @@ def compilePattern(pattern, nr, varid):
|
||||||
|
|
||||||
# !!a
|
# !!a
|
||||||
elif tok[0:2]=="!!":
|
elif tok[0:2]=="!!":
|
||||||
ret = ret + ' if (tok && tok->str() == "' + tok[2:] + '")\n'
|
ret = ret + ' if (tok && tok->str() == ' + insertMatchStr(matchStrs, tok[2:]) + ')/* ' + tok[2:] + ' */\n'
|
||||||
ret = ret + ' return false;\n'
|
ret = ret + ' return false;\n'
|
||||||
gotoNextToken = ' tok = tok ? tok->next() : NULL;\n'
|
gotoNextToken = ' tok = tok ? tok->next() : NULL;\n'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
ret = ret + ' if (!tok || !' + compileCmd(tok) + ')\n'
|
ret = ret + ' if (!tok || !' + compileCmd(tok, matchStrs) + ')\n'
|
||||||
ret = ret + ' return false;\n'
|
ret = ret + ' return false;\n'
|
||||||
ret = ret + ' return true;\n}\n'
|
ret = ret + ' return true;\n}\n'
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def parseMatch(line, pos1):
|
def parseMatch(line, pos1):
|
||||||
|
@ -137,13 +149,14 @@ def convertFile(srcname, destname):
|
||||||
srclines = fin.readlines()
|
srclines = fin.readlines()
|
||||||
fin.close()
|
fin.close()
|
||||||
|
|
||||||
|
header = '#include "token.h"\n'
|
||||||
|
header += '#include "errorlogger.h"\n'
|
||||||
|
header += '#include <string>\n'
|
||||||
|
header += '#include <cstring>\n'
|
||||||
matchfunctions = ''
|
matchfunctions = ''
|
||||||
matchfunctions = matchfunctions + '#include "token.h"\n'
|
|
||||||
matchfunctions = matchfunctions + '#include "errorlogger.h"\n'
|
|
||||||
matchfunctions = matchfunctions + '#include <string>\n'
|
|
||||||
matchfunctions = matchfunctions + '#include <cstring>\n'
|
|
||||||
code = ''
|
code = ''
|
||||||
|
|
||||||
|
matchStrs = {}
|
||||||
patternNumber = 1
|
patternNumber = 1
|
||||||
for line in srclines:
|
for line in srclines:
|
||||||
while True:
|
while True:
|
||||||
|
@ -175,13 +188,18 @@ def convertFile(srcname, destname):
|
||||||
if arg3:
|
if arg3:
|
||||||
a3 = ',' + arg3
|
a3 = ',' + arg3
|
||||||
line = line[:pos1]+'match'+str(patternNumber)+'('+arg1+a3+')'+line[pos1+len(g0):]
|
line = line[:pos1]+'match'+str(patternNumber)+'('+arg1+a3+')'+line[pos1+len(g0):]
|
||||||
matchfunctions = matchfunctions + compilePattern(arg2, patternNumber, arg3)
|
matchfunctions = matchfunctions + compilePattern(matchStrs, arg2, patternNumber, arg3)
|
||||||
patternNumber = patternNumber + 1
|
patternNumber = patternNumber + 1
|
||||||
|
|
||||||
code = code + line
|
code = code + line
|
||||||
|
|
||||||
|
# Compute string list
|
||||||
|
stringList = ''
|
||||||
|
for match in sorted(matchStrs, key=matchStrs.get):
|
||||||
|
stringList += 'static const std::string matchStr' + str(matchStrs[match]) + '("' + match + '");\n'
|
||||||
|
|
||||||
fout = open(destname, 'wt')
|
fout = open(destname, 'wt')
|
||||||
fout.write(matchfunctions+code)
|
fout.write(header+stringList+matchfunctions+code)
|
||||||
fout.close()
|
fout.close()
|
||||||
|
|
||||||
# selftests..
|
# selftests..
|
||||||
|
|
Loading…
Reference in New Issue