LanguageTool: don't crash if REST protocol isn't set
[LibreOffice.git] / compilerplugins / clang / methodcycles.py
blob52b950b86f76eaef4c492851b5376a411e12e039
1 #!/usr/bin/python3
3 from collections import defaultdict
4 import io
5 import re
6 import subprocess
7 import sys
9 # --------------------------------------------------------------------------------------------
10 # globals
11 # --------------------------------------------------------------------------------------------
13 definitionSet = set() # set of method_name
14 definitionToSourceLocationMap = dict()
16 # for the "unused methods" analysis
17 callDict = defaultdict(set) # map of from_method_name -> set(method_name)
19 # clang does not always use exactly the same numbers in the type-parameter vars it generates
20 # so I need to substitute them to ensure we can match correctly.
21 normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
22 def normalizeTypeParams( line ):
23 line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
24 # make some of the types a little prettier
25 line = line.replace("std::__debug", "std::")
26 line = line.replace("class ", "")
27 line = line.replace("struct ", "")
28 line = line.replace("_Bool", "bool")
29 return line
31 # --------------------------------------------------------------------------------------------
32 # primary input loop
33 # --------------------------------------------------------------------------------------------
35 cnt = 0
36 with io.open("workdir/loplugin.methodcycles.log", "r", buffering=1024*1024) as txt:
37 for line in txt:
38 tokens = line.strip().split("\t")
39 if tokens[0] == "definition:":
40 returnType = tokens[1]
41 nameAndParams = tokens[2]
42 sourceLocation = tokens[3]
43 funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
44 definitionSet.add(funcInfo)
45 definitionToSourceLocationMap[funcInfo] = sourceLocation
46 elif tokens[0] == "call:":
47 returnTypeFrom = tokens[1]
48 nameAndParamsFrom = tokens[2]
49 returnTypeTo = tokens[3]
50 nameAndParamsTo = tokens[4]
51 caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
52 callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
53 callDict[caller].add(callee)
54 else:
55 print( "unknown line: " + line)
56 cnt = cnt + 1
57 #if cnt > 100000: break
59 # sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
60 def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
61 return [int(text) if text.isdigit() else text.lower()
62 for text in re.split(_nsre, s)]
63 # sort by both the source-line and the datatype, so the output file ordering is stable
64 # when we have multiple items on the same source line
65 def v_sort_key(v):
66 return natural_sort_key(v[1]) + [v[0]]
67 def sort_set_by_natural_key(s):
68 return sorted(s, key=lambda v: v_sort_key(v))
71 # --------------------------------------------------------------------------------------------
72 # analysis
73 # --------------------------------------------------------------------------------------------
75 # follow caller-callee chains, removing all methods reachable from a root method
76 def remove_reachable(callDict, startCaller):
77 worklist = list()
78 worklist.append(startCaller)
79 while len(worklist) > 0:
80 caller = worklist.pop()
81 if not caller in callDict:
82 continue
83 calleeSet = callDict[caller]
84 del callDict[caller]
85 if caller in definitionSet:
86 definitionSet.remove(caller)
87 for c in calleeSet:
88 worklist.append(c)
90 # look for all the external entry points and remove code called from there
91 to_be_removed = set()
92 to_be_removed.add("int main(int,char **)")
93 # random dynload entrypoints that we don't otherwise find
94 to_be_removed.add("bool TestImportOLE2(SvStream &)")
95 to_be_removed.add("void SbiRuntime::StepREDIMP()")
96 to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)");
97 for caller in definitionSet:
98 if not caller in definitionToSourceLocationMap:
99 to_be_removed.append(caller)
100 continue
101 location = definitionToSourceLocationMap[caller]
102 if "include/com/" in location \
103 or "include/cppu/" in location \
104 or "include/cppuhelper/" in location \
105 or "include/osl/" in location \
106 or "include/rtl/" in location \
107 or "include/sal/" in location \
108 or "include/salhelper/" in location \
109 or "include/typelib/" in location \
110 or "include/uno/" in location \
111 or "workdir/UnpackedTarball/" in location \
112 or "workdir/UnoApiHeadersTarget/" in location \
113 or "workdir/CustomTarget/officecfg/" in location \
114 or "workdir/LexTarget/" in location \
115 or "workdir/CustomTarget/i18npool/localedata/" in location \
116 or "workdir/SdiTarget/" in location \
117 or "/qa/" in location \
118 or "include/test/" in location:
119 to_be_removed.add(caller)
120 # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
121 # which is not ideal
122 if "::~" in caller:
123 to_be_removed.add(caller)
124 # dyload entry points for VCL builder
125 if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
126 to_be_removed.add(caller)
127 if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
128 to_be_removed.add(caller)
129 # find all the UNO load-by-symbol-name entrypoints
130 uno_constructor_entrypoints = set()
131 git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
132 with git_grep_process.stdout as txt:
133 for line in txt:
134 idx1 = line.find(b"\"")
135 idx2 = line.find(b"\"", idx1 + 1)
136 func = line[idx1+1 : idx2]
137 uno_constructor_entrypoints.add(func.decode('utf-8'))
138 for caller in callDict:
139 if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
140 for func in uno_constructor_entrypoints:
141 if func in caller:
142 to_be_removed.add(caller)
143 # remove everything reachable from the found entry points
144 for caller in to_be_removed:
145 remove_reachable(callDict, caller)
146 for caller in callDict:
147 callDict[caller] -= to_be_removed
149 # create a reverse call graph
150 inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
151 for caller in callDict:
152 for callee in callDict[caller]:
153 inverseCallDict[callee].add(caller)
155 print_tree_recurse_set = set() # protect against cycles
156 def print_tree(f, callDict, caller, depth):
157 if depth == 0:
158 f.write("\n") # add an empty line before each tree
159 print_tree_recurse_set.clear()
160 # protect against cycles
161 if caller in print_tree_recurse_set:
162 return
163 # when printing out trees, things that are not in the map are things that are reachable,
164 # so we're not interested in them
165 if not caller in callDict:
166 return
167 print_tree_recurse_set.add(caller)
168 f.write(" " * depth + caller + "\n")
169 f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n")
170 calleeSet = callDict[caller]
171 for c in calleeSet:
172 print_tree(f, callDict, c, depth+1)
174 # find possible roots (ie. entrypoints) by looking for methods that are not called
175 def dump_possible_roots():
176 possibleRootList = list()
177 for caller in callDict:
178 if not caller in inverseCallDict and caller in definitionToSourceLocationMap:
179 possibleRootList.append(caller)
180 possibleRootList.sort()
182 # print out first 100 trees of caller->callees
183 count = 0
184 with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
185 f.write("callDict size " + str(len(callDict)) + "\n")
186 f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
187 f.write("\n")
188 for caller in possibleRootList:
189 f.write(caller + "\n")
190 f.write(" " + definitionToSourceLocationMap[caller] + "\n")
191 #print_tree(f, caller, 0)
192 count = count + 1
193 #if count>1000: break
195 # Look for cycles in a directed graph
196 # Adapted from:
197 # https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
198 def print_cycles():
199 with open("compilerplugins/clang/methodcycles.results", "wt") as f:
200 path = set()
201 visited = set()
203 def printPath(path):
204 if len(path) < 2:
205 return
206 # we may have found a cycle, but if the cycle is called from outside the cycle
207 # the code is still in use.
208 for p in path:
209 for caller in inverseCallDict[p]:
210 if not caller in path:
211 return
212 f.write("found cycle\n")
213 for p in path:
214 f.write(" " + p + "\n")
215 f.write(" " + definitionToSourceLocationMap[p] + "\n")
216 f.write("\n")
218 def checkCyclic(vertex):
219 if vertex in visited:
220 return
221 visited.add(vertex)
222 path.add(vertex)
223 if vertex in callDict:
224 for neighbour in callDict[vertex]:
225 if neighbour in path:
226 printPath(path)
227 break
228 else:
229 checkCyclic(neighbour)
230 path.remove(vertex)
232 for caller in callDict:
233 checkCyclic(caller)
235 print_cycles()
237 # print partitioned sub-graphs
238 def print_partitions():
239 callDict2 = callDict
240 # Remove anything with no callees, and that is itself not called.
241 # After this stage, we should only be left with closed sub-graphs ie. partitions
242 while True:
243 to_be_removed.clear()
244 for caller in callDict2:
245 if len(callDict2[caller]) == 0 \
246 or not caller in inverseCallDict[caller]:
247 to_be_removed.add(caller)
248 if len(to_be_removed) == 0:
249 break
250 for caller in to_be_removed:
251 remove_reachable(callDict2, caller)
252 for caller in callDict2:
253 callDict2[caller] -= to_be_removed
255 count = 0
256 with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
257 f.write("callDict size " + str(len(callDict2)) + "\n")
258 f.write("\n")
259 while len(callDict2) > 0:
260 print_tree(f, callDict2, next(iter(callDict2)), 0)
261 for c in print_tree_recurse_set:
262 callDict2.pop(c, None)
263 count = count + 1
264 if count>1000: break
266 print_partitions()