Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / compilerplugins / clang / constantparam.py
blob1371a6d9d7f3e6fd4bd652aa00ef26c329a4a109
1 #!/usr/bin/python3
3 import re
4 import io
6 callDict = dict() # callInfo tuple -> callValue
8 # clang does not always use exactly the same numbers in the type-parameter vars it generates
9 # so I need to substitute them to ensure we can match correctly.
10 normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
11 def normalizeTypeParams( line ):
12 return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
14 # reading as binary (since we known it is pure ascii) is much faster than reading as unicode
15 with io.open("workdir/loplugin.constantparam.log", "r") as txt:
16 line_no = 1;
17 try:
18 for line in txt:
19 tokens = line.strip().split("\t")
20 returnType = normalizeTypeParams(tokens[0])
21 nameAndParams = normalizeTypeParams(tokens[1])
22 sourceLocation = tokens[2]
23 paramName = tokens[3]
24 paramType = normalizeTypeParams(tokens[4])
25 callValue = tokens[5]
26 callInfo = (returnType, nameAndParams, paramName, paramType, sourceLocation)
27 if not callInfo in callDict:
28 callDict[callInfo] = set()
29 callDict[callInfo].add(callValue)
30 line_no += 1
31 except (IndexError,UnicodeDecodeError):
32 print("problem with line " + str(line_no))
33 raise
35 def RepresentsInt(s):
36 try:
37 int(s)
38 return True
39 except ValueError:
40 return False
42 constructor_regex = re.compile(r"^\w+\(\)$")
44 tmp1list = list()
45 tmp2list = list()
46 tmp3list = list()
47 tmp4list = list()
48 for callInfo, callValues in iter(callDict.items()):
49 nameAndParams = callInfo[1]
50 if len(callValues) != 1:
51 continue
52 callValue = next(iter(callValues))
53 if "unknown" in callValue:
54 continue
55 sourceLoc = callInfo[4]
56 functionSig = callInfo[0] + " " + callInfo[1]
58 # try to ignore setter methods
59 if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
60 continue
61 # ignore code that follows a common pattern
62 if sourceLoc.startswith("sw/inc/swatrset.hxx"): continue
63 if sourceLoc.startswith("sw/inc/format.hxx"): continue
64 # template generated code
65 if sourceLoc.startswith("include/sax/fshelper.hxx"): continue
66 # debug code
67 if sourceLoc.startswith("include/oox/dump"): continue
68 # part of our binary API
69 if sourceLoc.startswith("include/LibreOfficeKit"): continue
71 # ignore methods generated by SFX macros
72 if "RegisterInterface(class SfxModule *)" in nameAndParams: continue
73 if "RegisterChildWindow(_Bool,class SfxModule *,enum SfxChildWindowFlags)" in nameAndParams: continue
74 if "RegisterControl(unsigned short,class SfxModule *)" in nameAndParams: continue
76 if RepresentsInt(callValue):
77 if callValue == "0" or callValue == "1":
78 tmp1list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
79 else:
80 tmp2list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
81 # look for places where the callsite is always a constructor invocation
82 elif constructor_regex.match(callValue) or callValue == "\"\"":
83 if callValue.startswith("Get"): continue
84 if callValue.startswith("get"): continue
85 if "operator=" in functionSig: continue
86 if "&&" in functionSig: continue
87 if callInfo[2] == "###0" and callValue == "InitData()": continue
88 if callInfo[2] == "###0" and callValue == "InitAggregate()": continue
89 if callValue == "shared_from_this()": continue
90 tmp3list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
91 else:
92 tmp4list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
95 # sort results by filename:lineno
96 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
97 return [int(text) if text.isdigit() else text.lower()
98 for text in re.split(_nsre, s)]
99 # sort by both the source-line and the datatype, so the output file ordering is stable
100 # when we have multiple items on the same source line
101 def v_sort_key(v):
102 return natural_sort_key(v[0]) + [v[1]]
103 tmp1list.sort(key=lambda v: v_sort_key(v))
104 tmp2list.sort(key=lambda v: v_sort_key(v))
105 tmp3list.sort(key=lambda v: v_sort_key(v))
106 tmp4list.sort(key=lambda v: v_sort_key(v))
108 # print out the results
109 with open("compilerplugins/clang/constantparam.booleans.results", "wt") as f:
110 for v in tmp1list:
111 f.write(v[0] + "\n")
112 f.write(" " + v[1] + "\n")
113 f.write(" " + v[2] + "\n")
114 f.write(" " + v[3] + "\n")
115 with open("compilerplugins/clang/constantparam.numbers.results", "wt") as f:
116 for v in tmp2list:
117 f.write(v[0] + "\n")
118 f.write(" " + v[1] + "\n")
119 f.write(" " + v[2] + "\n")
120 f.write(" " + v[3] + "\n")
121 with open("compilerplugins/clang/constantparam.constructors.results", "wt") as f:
122 for v in tmp3list:
123 f.write(v[0] + "\n")
124 f.write(" " + v[1] + "\n")
125 f.write(" " + v[2] + "\n")
126 f.write(" " + v[3] + "\n")
127 with open("compilerplugins/clang/constantparam.others.results", "wt") as f:
128 for v in tmp4list:
129 f.write(v[0] + "\n")
130 f.write(" " + v[1] + "\n")
131 f.write(" " + v[2] + "\n")
132 f.write(" " + v[3] + "\n")
134 # -------------------------------------------------------------
135 # Now a fun set of heuristics to look for methods that
136 # take bitmask parameters where one or more of the bits in the
137 # bitmask is always one or always zero
139 # integer to hex str
140 def hex(i):
141 return "0x%x" % i
142 # I can't use python's ~ operator, because that produces negative numbers
143 def negate(i):
144 return (1 << 32) - 1 - i
146 tmp2list = list()
147 for callInfo, callValues in iter(callDict.items()):
148 nameAndParams = callInfo[1]
149 if len(callValues) < 2:
150 continue
151 # we are only interested in enum parameters
152 if not "enum" in callInfo[3]: continue
153 if not "Flag" in callInfo[3] and not "flag" in callInfo[3] and not "Bit" in callInfo[3] and not "State" in callInfo[3]: continue
154 # try to ignore setter methods
155 if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
156 continue
158 setBits = 0
159 clearBits = 0
160 continue_flag = False
161 first = True
162 for callValue in callValues:
163 if "unknown" == callValue or not callValue.isdigit():
164 continue_flag = True
165 break
166 if first:
167 setBits = int(callValue)
168 clearBits = negate(int(callValue))
169 first = False
170 else:
171 setBits = setBits & int(callValue)
172 clearBits = clearBits & negate(int(callValue))
174 # estimate allBits by using the highest bit we have seen
175 # TODO dump more precise information about the allBits values of enums
176 allBits = (1 << setBits.bit_length()) - 1
177 clearBits = clearBits & allBits
178 if continue_flag or (setBits == 0 and clearBits == 0): continue
180 sourceLoc = callInfo[4]
181 functionSig = callInfo[0] + " " + callInfo[1]
183 v2 = callInfo[3] + " " + callInfo[2]
184 if setBits != 0: v2 += " setBits=" + hex(setBits)
185 if clearBits != 0: v2 += " clearBits=" + hex(clearBits)
186 tmp2list.append((sourceLoc, functionSig, v2))
189 # sort results by filename:lineno
190 tmp2list.sort(key=lambda v: v_sort_key(v))
192 # print out the results
193 with open("compilerplugins/clang/constantparam.bitmask.results", "wt") as f:
194 for v in tmp2list:
195 f.write(v[0] + "\n")
196 f.write(" " + v[1] + "\n")
197 f.write(" " + v[2] + "\n")