bump product version to 6.4.0.3
[LibreOffice.git] / compilerplugins / clang / methodcycles.py
blob77c812d4ab9a9a7e26e713f740ca2059ab75a81d
1 #!/usr/bin/python
3 from collections import defaultdict
4 import io
5 import re
6 import subprocess
7 import sys
9 # --------------------------------------------------------------------------------------------
10 # globals
11 # --------------------------------------------------------------------------------------------
13 definitionSet = set() # set of method_name
14 definitionToSourceLocationMap = dict()
16 # for the "unused methods" analysis
17 callDict = defaultdict(set) # map of from_method_name -> set(method_name)
19 # clang does not always use exactly the same numbers in the type-parameter vars it generates
20 # so I need to substitute them to ensure we can match correctly.
21 normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
22 def normalizeTypeParams( line ):
23 line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
24 # make some of the types a little prettier
25 line = line.replace("std::__debug", "std::")
26 line = line.replace("class ", "")
27 line = line.replace("struct ", "")
28 line = line.replace("_Bool", "bool")
29 return line
31 # --------------------------------------------------------------------------------------------
32 # primary input loop
33 # --------------------------------------------------------------------------------------------
35 cnt = 0
36 with io.open("workdir/loplugin.methodcycles.log", "rb", buffering=1024*1024) as txt:
37 for line in txt:
38 tokens = line.strip().split("\t")
39 if tokens[0] == "definition:":
40 returnType = tokens[1]
41 nameAndParams = tokens[2]
42 sourceLocation = tokens[3]
43 funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
44 definitionSet.add(funcInfo)
45 definitionToSourceLocationMap[funcInfo] = sourceLocation
46 elif tokens[0] == "call:":
47 returnTypeFrom = tokens[1]
48 nameAndParamsFrom = tokens[2]
49 returnTypeTo = tokens[3]
50 nameAndParamsTo = tokens[4]
51 caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
52 callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
53 callDict[caller].add(callee)
54 else:
55 print( "unknown line: " + line)
56 cnt = cnt + 1
57 #if cnt > 100000: break
59 # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
60 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
61 return [int(text) if text.isdigit() else text.lower()
62 for text in re.split(_nsre, s)]
63 def sort_set_by_natural_key(s):
64 return sorted(s, key=lambda v: natural_sort_key(v[1]))
67 # --------------------------------------------------------------------------------------------
68 # analysis
69 # --------------------------------------------------------------------------------------------
71 # follow caller-callee chains, removing all methods reachable from a root method
72 def remove_reachable(callDict, startCaller):
73 worklist = list()
74 worklist.append(startCaller)
75 while len(worklist) > 0:
76 caller = worklist.pop()
77 if not caller in callDict:
78 continue
79 calleeSet = callDict[caller]
80 del callDict[caller]
81 if caller in definitionSet:
82 definitionSet.remove(caller)
83 for c in calleeSet:
84 worklist.append(c)
86 # look for all the external entry points and remove code called from there
87 to_be_removed = set()
88 to_be_removed.add("int main(int,char **)")
89 # random dynload entrypoints that we don't otherwise find
90 to_be_removed.add("bool TestImportOLE2(SvStream &)")
91 to_be_removed.add("void SbiRuntime::StepREDIMP()")
92 to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)");
93 for caller in definitionSet:
94 if not caller in definitionToSourceLocationMap:
95 to_be_removed.append(caller)
96 continue
97 location = definitionToSourceLocationMap[caller]
98 if "include/com/" in location \
99 or "include/cppu/" in location \
100 or "include/cppuhelper/" in location \
101 or "include/osl/" in location \
102 or "include/rtl/" in location \
103 or "include/sal/" in location \
104 or "include/salhelper/" in location \
105 or "include/systools/" in location \
106 or "include/typelib/" in location \
107 or "include/uno/" in location \
108 or "workdir/UnpackedTarball/" in location \
109 or "workdir/UnoApiHeadersTarget/" in location \
110 or "workdir/CustomTarget/officecfg/" in location \
111 or "workdir/LexTarget/" in location \
112 or "workdir/CustomTarget/i18npool/localedata/" in location \
113 or "workdir/SdiTarget/" in location \
114 or "/qa/" in location \
115 or "include/test/" in location:
116 to_be_removed.add(caller)
117 # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
118 # which is not ideal
119 if "::~" in caller:
120 to_be_removed.add(caller)
121 # dyload entry points for VCL builder
122 if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
123 to_be_removed.add(caller)
124 if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
125 to_be_removed.add(caller)
126 # find all the UNO load-by-symbol-name entrypoints
127 uno_constructor_entrypoints = set()
128 git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
129 with git_grep_process.stdout as txt:
130 for line in txt:
131 idx1 = line.find("\"")
132 idx2 = line.find("\"", idx1 + 1)
133 func = line[idx1+1 : idx2]
134 uno_constructor_entrypoints.add(func)
135 for caller in callDict:
136 if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
137 for func in uno_constructor_entrypoints:
138 if func in caller:
139 to_be_removed.add(caller)
140 # remove everything reachable from the found entry points
141 for caller in to_be_removed:
142 remove_reachable(callDict, caller)
143 for caller in callDict:
144 callDict[caller] -= to_be_removed
146 # create a reverse call graph
147 inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
148 for caller in callDict:
149 for callee in callDict[caller]:
150 inverseCallDict[callee].add(caller)
152 print_tree_recurse_set = set() # protect against cycles
153 def print_tree(f, callDict, caller, depth):
154 if depth == 0:
155 f.write("\n") # add an empty line before each tree
156 print_tree_recurse_set.clear()
157 # protect against cycles
158 if caller in print_tree_recurse_set:
159 return
160 # when printing out trees, things that are not in the map are things that are reachable,
161 # so we're not interested in them
162 if not caller in callDict:
163 return
164 print_tree_recurse_set.add(caller)
165 f.write(" " * depth + caller + "\n")
166 f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n")
167 calleeSet = callDict[caller]
168 for c in calleeSet:
169 print_tree(f, callDict, c, depth+1)
171 # find possible roots (ie. entrypoints) by looking for methods that are not called
172 def dump_possible_roots():
173 possibleRootList = list()
174 for caller in callDict:
175 if not caller in inverseCallDict and caller in definitionToSourceLocationMap:
176 possibleRootList.append(caller)
177 possibleRootList.sort()
179 # print out first 100 trees of caller->callees
180 count = 0
181 with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
182 f.write("callDict size " + str(len(callDict)) + "\n")
183 f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
184 f.write("\n")
185 for caller in possibleRootList:
186 f.write(caller + "\n")
187 f.write(" " + definitionToSourceLocationMap[caller] + "\n")
188 #print_tree(f, caller, 0)
189 count = count + 1
190 #if count>1000: break
192 # Look for cycles in a directed graph
193 # Adapted from:
194 # https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
195 def print_cycles():
196 with open("compilerplugins/clang/methodcycles.results", "wt") as f:
197 path = set()
198 visited = set()
200 def printPath(path):
201 if len(path) < 2:
202 return
203 # we may have found a cycle, but if the cycle is called from outside the cycle
204 # the code is still in use.
205 for p in path:
206 for caller in inverseCallDict[p]:
207 if not caller in path:
208 return
209 f.write("found cycle\n")
210 for p in path:
211 f.write(" " + p + "\n")
212 f.write(" " + definitionToSourceLocationMap[p] + "\n")
213 f.write("\n")
215 def checkCyclic(vertex):
216 if vertex in visited:
217 return
218 visited.add(vertex)
219 path.add(vertex)
220 if vertex in callDict:
221 for neighbour in callDict[vertex]:
222 if neighbour in path:
223 printPath(path)
224 break
225 else:
226 checkCyclic(neighbour)
227 path.remove(vertex)
229 for caller in callDict:
230 checkCyclic(caller)
232 print_cycles()
234 # print partitioned sub-graphs
235 def print_partitions():
236 callDict2 = callDict
237 # Remove anything with no callees, and that is itself not called.
238 # After this stage, we should only be left with closed sub-graphs ie. partitions
239 while True:
240 to_be_removed.clear()
241 for caller in callDict2:
242 if len(callDict2[caller]) == 0 \
243 or not caller in inverseCallDict[caller]:
244 to_be_removed.add(caller)
245 if len(to_be_removed) == 0:
246 break
247 for caller in to_be_removed:
248 remove_reachable(callDict2, caller)
249 for caller in callDict2:
250 callDict2[caller] -= to_be_removed
252 count = 0
253 with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
254 f.write("callDict size " + str(len(callDict2)) + "\n")
255 f.write("\n")
256 while len(callDict2) > 0:
257 print_tree(f, callDict2, next(iter(callDict2)), 0)
258 for c in print_tree_recurse_set:
259 callDict2.pop(c, None)
260 count = count + 1
261 if count>1000: break
263 print_partitions()