Version 6.1.0.2, tag libreoffice-6.1.0.2
[LibreOffice.git] / compilerplugins / clang / constantparam.py
blob5a1ae85e1192ba7116444f532b6a8babf4204fb6
1 #!/usr/bin/python
3 import sys
4 import re
5 import io
7 callDict = dict() # callInfo tuple -> callValue
9 # clang does not always use exactly the same numbers in the type-parameter vars it generates
10 # so I need to substitute them to ensure we can match correctly.
11 normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
12 def normalizeTypeParams( line ):
13 return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
15 # reading as binary (since we known it is pure ascii) is much faster than reading as unicode
16 with io.open("workdir/loplugin.constantparam.log", "rb", buffering=1024*1024) as txt:
17 for line in txt:
18 try:
19 tokens = line.strip().split("\t")
20 returnType = normalizeTypeParams(tokens[0])
21 nameAndParams = normalizeTypeParams(tokens[1])
22 sourceLocation = tokens[2]
23 paramName = tokens[3]
24 paramType = normalizeTypeParams(tokens[4])
25 callValue = tokens[5]
26 callInfo = (returnType, nameAndParams, paramName, paramType, sourceLocation)
27 if not callInfo in callDict:
28 callDict[callInfo] = set()
29 callDict[callInfo].add(callValue)
30 except IndexError:
31 print "problem with line " + line.strip()
32 raise
34 def RepresentsInt(s):
35 try:
36 int(s)
37 return True
38 except ValueError:
39 return False
41 constructor_regex = re.compile("^\w+\(\)$")
43 tmp1list = list()
44 tmp2list = list()
45 tmp3list = list()
46 tmp4list = list()
47 for callInfo, callValues in callDict.iteritems():
48 nameAndParams = callInfo[1]
49 if len(callValues) != 1:
50 continue
51 callValue = next(iter(callValues))
52 if "unknown" in callValue:
53 continue
54 sourceLoc = callInfo[4]
55 functionSig = callInfo[0] + " " + callInfo[1]
57 # try to ignore setter methods
58 if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
59 continue
60 # ignore code that follows a common pattern
61 if sourceLoc.startswith("sw/inc/swatrset.hxx"): continue
62 if sourceLoc.startswith("sw/inc/format.hxx"): continue
63 # template generated code
64 if sourceLoc.startswith("include/sax/fshelper.hxx"): continue
65 # debug code
66 if sourceLoc.startswith("include/oox/dump"): continue
67 # part of our binary API
68 if sourceLoc.startswith("include/LibreOfficeKit"): continue
70 # ignore methods generated by SFX macros
71 if "RegisterInterface(class SfxModule *)" in nameAndParams: continue
72 if "RegisterChildWindow(_Bool,class SfxModule *,enum SfxChildWindowFlags)" in nameAndParams: continue
73 if "RegisterControl(unsigned short,class SfxModule *)" in nameAndParams: continue
75 if RepresentsInt(callValue):
76 if callValue == "0" or callValue == "1":
77 tmp1list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
78 else:
79 tmp2list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
80 # look for places where the callsite is always a constructor invocation
81 elif constructor_regex.match(callValue) or callValue == "\"\"":
82 if callValue.startswith("Get"): continue
83 if callValue.startswith("get"): continue
84 if "operator=" in functionSig: continue
85 if "&&" in functionSig: continue
86 if callInfo[2] == "###0" and callValue == "InitData()": continue
87 if callInfo[2] == "###0" and callValue == "InitAggregate()": continue
88 if callValue == "shared_from_this()": continue
89 tmp3list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
90 else:
91 tmp4list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
94 # sort results by filename:lineno
95 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
96 return [int(text) if text.isdigit() else text.lower()
97 for text in re.split(_nsre, s)]
98 tmp1list.sort(key=lambda v: natural_sort_key(v[0]))
99 tmp2list.sort(key=lambda v: natural_sort_key(v[0]))
100 tmp3list.sort(key=lambda v: natural_sort_key(v[0]))
101 tmp4list.sort(key=lambda v: natural_sort_key(v[0]))
103 # print out the results
104 with open("compilerplugins/clang/constantparam.booleans.results", "wt") as f:
105 for v in tmp1list:
106 f.write(v[0] + "\n")
107 f.write(" " + v[1] + "\n")
108 f.write(" " + v[2] + "\n")
109 f.write(" " + v[3] + "\n")
110 with open("compilerplugins/clang/constantparam.numbers.results", "wt") as f:
111 for v in tmp2list:
112 f.write(v[0] + "\n")
113 f.write(" " + v[1] + "\n")
114 f.write(" " + v[2] + "\n")
115 f.write(" " + v[3] + "\n")
116 with open("compilerplugins/clang/constantparam.constructors.results", "wt") as f:
117 for v in tmp3list:
118 f.write(v[0] + "\n")
119 f.write(" " + v[1] + "\n")
120 f.write(" " + v[2] + "\n")
121 f.write(" " + v[3] + "\n")
122 with open("compilerplugins/clang/constantparam.others.results", "wt") as f:
123 for v in tmp4list:
124 f.write(v[0] + "\n")
125 f.write(" " + v[1] + "\n")
126 f.write(" " + v[2] + "\n")
127 f.write(" " + v[3] + "\n")
129 # -------------------------------------------------------------
130 # Now a fun set of heuristics to look for methods that
131 # take bitmask parameters where one or more of the bits in the
132 # bitmask is always one or always zero
134 # integer to hex str
135 def hex(i):
136 return "0x%x" % i
137 # I can't use python's ~ operator, because that produces negative numbers
138 def negate(i):
139 return (1 << 32) - 1 - i
141 tmp2list = list()
142 for callInfo, callValues in callDict.iteritems():
143 nameAndParams = callInfo[1]
144 if len(callValues) < 2:
145 continue
146 # we are only interested in enum parameters
147 if not "enum" in callInfo[3]: continue
148 if not "Flag" in callInfo[3] and not "flag" in callInfo[3] and not "Bit" in callInfo[3] and not "State" in callInfo[3]: continue
149 # try to ignore setter methods
150 if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
151 continue
153 setBits = 0
154 clearBits = 0
155 continue_flag = False
156 first = True
157 for callValue in callValues:
158 if "unknown" == callValue or not callValue.isdigit():
159 continue_flag = True
160 break
161 if first:
162 setBits = int(callValue)
163 clearBits = negate(int(callValue))
164 first = False
165 else:
166 setBits = setBits & int(callValue)
167 clearBits = clearBits & negate(int(callValue))
169 # estimate allBits by using the highest bit we have seen
170 # TODO dump more precise information about the allBits values of enums
171 allBits = (1 << setBits.bit_length()) - 1
172 clearBits = clearBits & allBits
173 if continue_flag or (setBits == 0 and clearBits == 0): continue
175 sourceLoc = callInfo[4]
176 functionSig = callInfo[0] + " " + callInfo[1]
178 v2 = callInfo[3] + " " + callInfo[2]
179 if setBits != 0: v2 += " setBits=" + hex(setBits)
180 if clearBits != 0: v2 += " clearBits=" + hex(clearBits)
181 tmp2list.append((sourceLoc, functionSig, v2))
184 # sort results by filename:lineno
185 tmp2list.sort(key=lambda v: natural_sort_key(v[0]))
187 # print out the results
188 with open("compilerplugins/clang/constantparam.bitmask.results", "wt") as f:
189 for v in tmp2list:
190 f.write(v[0] + "\n")
191 f.write(" " + v[1] + "\n")
192 f.write(" " + v[2] + "\n")