Fix typos
[LibreOffice.git] / bin / find-can-be-private-symbols.py
blobe3c08b9b40c75ee1269f3a0a9cefd9c0bd49f864
1 #!/usr/bin/python3
3 # Find exported symbols that can be made non-exported.
5 # Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6 # dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
7 # is rather complex.
9 # Takes about 5min to run on a decent machine.
11 # The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12 # (something to do with destructor thunks not showing up in my results?)
14 # Also, the class/method analysis will not catch problems like
15 # 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16 # but loplugin:dyncastvisibility will do that for you
19 import subprocess
20 import re
22 exported_symbols1 = set()
23 imported_symbols1 = set()
24 exported_symbols2 = set() # decoded
25 imported_symbols2 = set() # decoded
28 # find all our shared libs
29 subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
30 stdout=subprocess.PIPE, shell=True)
31 with subprocess_find.stdout as txt:
32 for line in txt:
33 sharedlib = line.strip()
34 # look for exported symbols
35 subprocess_nm = subprocess.Popen(b"nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
36 with subprocess_nm.stdout as txt2:
37 # We are looking for lines something like:
38 # 0000000000036ed0 T flash_component_getFactory
39 line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
40 for line2_bytes in txt2:
41 line2 = line2_bytes.strip().decode("utf-8")
42 if line_regex.match(line2):
43 sym = line2.split(" ")[2].strip()
44 exported_symbols1.add(sym)
45 subprocess_nm.terminate()
46 # look for imported symbols
47 subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
48 with subprocess_objdump.stdout as txt2:
49 # ignore some header bumpf
50 txt2.readline()
51 txt2.readline()
52 txt2.readline()
53 txt2.readline()
54 # We are looking for lines something like:
55 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
56 for line2_bytes in txt2:
57 line2 = line2_bytes.strip().decode("utf-8")
58 if "*UND*" not in line2:
59 continue
60 tokens = line2.split(" ")
61 sym = tokens[len(tokens)-1].strip()
62 imported_symbols1.add(sym)
63 subprocess_objdump.terminate()
64 subprocess_find.terminate()
66 # look for imported symbols in executables
67 subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
68 with subprocess_find.stdout as txt:
69 for line in txt:
70 executable = line.strip()
71 # look for exported symbols
72 subprocess_nm = subprocess.Popen(b"nm -D " + executable + b" | grep -w U", stdout=subprocess.PIPE, shell=True)
73 with subprocess_nm.stdout as txt2:
74 # We are looking for lines something like:
75 # U sal_detail_deinitialize
76 for line2_bytes in txt2:
77 line2 = line2_bytes.strip().decode("utf-8")
78 sym = line2.split(" ")[1]
79 imported_symbols1.add(sym)
80 subprocess_find.terminate()
82 # Now we have to symbolize before comparing because sometimes (due to thunks) two
83 # different encoded names symbolize to the same method/func name
85 progress = 0
86 progress_max_len = len(imported_symbols1) + len(exported_symbols1)
87 for sym in imported_symbols1:
88 progress += 1
89 if (progress % 128 == 0):
90 print( str(int(progress * 100 / progress_max_len)) + "%")
91 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
92 if filtered_sym.startswith("non-virtual thunk to "):
93 filtered_sym = filtered_sym[21:]
94 elif filtered_sym.startswith("virtual thunk to "):
95 filtered_sym = filtered_sym[17:]
96 imported_symbols2.add(filtered_sym)
97 progress = 0
98 for sym in exported_symbols1:
99 progress += 1
100 if (progress % 128 == 0):
101 print( str(int(progress * 100 / progress_max_len)) + "%")
102 filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
103 if filtered_sym.startswith("non-virtual thunk to "):
104 filtered_sym = filtered_sym[21:]
105 elif filtered_sym.startswith("virtual thunk to "):
106 filtered_sym = filtered_sym[17:]
107 exported_symbols2.add(filtered_sym)
109 unused_exports = exported_symbols2 - imported_symbols2
110 print("exported = " + str(len(exported_symbols2)))
111 print("imported = " + str(len(imported_symbols2)))
112 print("unused_exports = " + str(len(unused_exports)))
114 # for each class, count how many symbols will become hidden if we mark the class as hidden
115 can_be_hidden_count = dict()
116 for sym in exported_symbols2:
117 i = sym.rfind("::")
118 if i == -1:
119 continue
120 clz = sym[:i]
121 if clz in can_be_hidden_count:
122 can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1
123 else:
124 can_be_hidden_count[clz] = 1
125 for sym in imported_symbols2:
126 i = sym.rfind("::")
127 if i == -1:
128 continue
129 clz = sym[:i]
130 if clz in can_be_hidden_count:
131 can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1
132 else:
133 can_be_hidden_count[clz] = -1
134 # convert to list, and sort the results in descending order
135 can_be_hidden_list = list()
136 for clz in can_be_hidden_count:
137 cnt = can_be_hidden_count[clz]
138 if cnt > 0:
139 can_be_hidden_list.append((cnt, clz))
140 can_be_hidden_list.sort(reverse=True)
141 with open("bin/find-can-be-private-symbols.classes.results", "wt") as f:
142 for i in can_be_hidden_list:
143 if i[0] < 10:
144 break
145 f.write(str(i[0]) + " " + i[1] + "\n")
148 with open("bin/find-can-be-private-symbols.functions.results", "wt") as f:
149 for sym in sorted(unused_exports):
150 # Filter out most of the noise.
151 # No idea where these are coming from, but not our code.
152 if sym.startswith("CERT_"):
153 continue
154 elif sym.startswith("DER_"):
155 continue
156 elif sym.startswith("FORM_"):
157 continue
158 elif sym.startswith("FPDF"):
159 continue
160 elif sym.startswith("HASH_"):
161 continue
162 elif sym.startswith("Hunspell_"):
163 continue
164 elif sym.startswith("LL_"):
165 continue
166 elif sym.startswith("LP_"):
167 continue
168 elif sym.startswith("LU"):
169 continue
170 elif sym.startswith("MIP"):
171 continue
172 elif sym.startswith("MPS"):
173 continue
174 elif sym.startswith("NSS"):
175 continue
176 elif sym.startswith("NSC_"):
177 continue
178 elif sym.startswith("PK11"):
179 continue
180 elif sym.startswith("PL_"):
181 continue
182 elif sym.startswith("PQ"):
183 continue
184 elif sym.startswith("PBE_"):
185 continue
186 elif sym.startswith("PORT_"):
187 continue
188 elif sym.startswith("PRP_"):
189 continue
190 elif sym.startswith("PR_"):
191 continue
192 elif sym.startswith("PT_"):
193 continue
194 elif sym.startswith("QS_"):
195 continue
196 elif sym.startswith("REPORT_"):
197 continue
198 elif sym.startswith("RSA_"):
199 continue
200 elif sym.startswith("SEC"):
201 continue
202 elif sym.startswith("SGN"):
203 continue
204 elif sym.startswith("SOS"):
205 continue
206 elif sym.startswith("SSL_"):
207 continue
208 elif sym.startswith("VFY_"):
209 continue
210 elif sym.startswith("_PR_"):
211 continue
212 elif sym.startswith("ber_"):
213 continue
214 elif sym.startswith("bfp_"):
215 continue
216 elif sym.startswith("ldap_"):
217 continue
218 elif sym.startswith("ne_"):
219 continue
220 elif sym.startswith("opj_"):
221 continue
222 elif sym.startswith("pg_"):
223 continue
224 elif sym.startswith("pq"):
225 continue
226 elif sym.startswith("presolve_"):
227 continue
228 elif sym.startswith("sqlite3_"):
229 continue
230 elif sym.startswith("libepubgen::"):
231 continue
232 elif sym.startswith("lucene::"):
233 continue
234 elif sym.startswith("Hunspell::"):
235 continue
236 elif sym.startswith("sk_"):
237 continue
238 elif sym.startswith("_Z"):
239 continue
240 # dynamically loaded
241 elif sym.endswith("get_implementation"):
242 continue
243 elif sym.endswith("component_getFactory"):
244 continue
245 elif sym == "CreateUnoWrapper":
246 continue
247 elif sym == "ExportDOC":
248 continue
249 elif sym == "ExportRTF":
250 continue
251 elif sym == "GetSaveWarningOfMSVBAStorage_ww8":
252 continue
253 elif sym == "GetSpecialCharsForEdit":
254 continue
255 elif sym.startswith("Import"):
256 continue
257 elif sym.startswith("Java_com_sun_star_"):
258 continue
259 elif sym.startswith("TestImport"):
260 continue
261 elif sym.startswith("getAllCalendars_"):
262 continue
263 elif sym.startswith("getAllCurrencies_"):
264 continue
265 elif sym.startswith("getAllFormats"):
266 continue
267 elif sym.startswith("getBreakIteratorRules_"):
268 continue
269 elif sym.startswith("getCollationOptions_"):
270 continue
271 elif sym.startswith("getCollatorImplementation_"):
272 continue
273 elif sym.startswith("getContinuousNumberingLevels_"):
274 continue
275 elif sym.startswith("getDateAcceptancePatterns_"):
276 continue
277 elif sym.startswith("getForbiddenCharacters_"):
278 continue
279 elif sym.startswith("getIndexAlgorithm_"):
280 continue
281 elif sym.startswith("getLCInfo_"):
282 continue
283 elif sym.startswith("getLocaleItem_"):
284 continue
285 elif sym.startswith("getOutlineNumberingLevels_"):
286 continue
287 elif sym.startswith("getReservedWords_"):
288 continue
289 elif sym.startswith("getSTC_"):
290 continue
291 elif sym.startswith("getSearchOptions_"):
292 continue
293 elif sym.startswith("getTransliterations_"):
294 continue
295 elif sym.startswith("getUnicodeScripts_"):
296 continue
297 elif sym.startswith("lok_"):
298 continue
299 # UDK API
300 elif sym.startswith("osl_"):
301 continue
302 elif sym.startswith("rtl_"):
303 continue
304 elif sym.startswith("typelib_"):
305 continue
306 elif sym.startswith("typereg_"):
307 continue
308 elif sym.startswith("uno_"):
309 continue
310 # remove things we found that do not exist in our source code, they're not ours
311 #if not(extractFunctionNameFromSignature(sym) in all_source_names): continue
312 f.write(sym + "\n")