3 # Find exported symbols that can be made non-exported.
5 # Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6 # dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
9 # Takes about 5min to run on a decent machine.
11 # The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12 # (something to do with destructor thunks not showing up in my results?)
14 # Also, the class/method analysis will not catch problems like
15 # 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16 # but loplugin:dyncastvisibility will do that for you
22 exported_symbols1
= set()
23 imported_symbols1
= set()
24 exported_symbols2
= set() # decoded
25 imported_symbols2
= set() # decoded
28 # find all our shared libs
29 subprocess_find
= subprocess
.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
30 stdout
=subprocess
.PIPE
, shell
=True)
31 with subprocess_find
.stdout
as txt
:
33 sharedlib
= line
.strip()
34 # look for exported symbols
35 subprocess_nm
= subprocess
.Popen(b
"nm -D " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
36 with subprocess_nm
.stdout
as txt2
:
37 # We are looking for lines something like:
38 # 0000000000036ed0 T flash_component_getFactory
39 line_regex
= re
.compile(r
'^[0-9a-fA-F]+ T ')
40 for line2_bytes
in txt2
:
41 line2
= line2_bytes
.strip().decode("utf-8")
42 if line_regex
.match(line2
):
43 sym
= line2
.split(" ")[2].strip()
44 exported_symbols1
.add(sym
)
45 subprocess_nm
.terminate()
46 # look for imported symbols
47 subprocess_objdump
= subprocess
.Popen(b
"objdump -T " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
48 with subprocess_objdump
.stdout
as txt2
:
49 # ignore some header bumpf
54 # We are looking for lines something like:
55 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
56 for line2_bytes
in txt2
:
57 line2
= line2_bytes
.strip().decode("utf-8")
58 if "*UND*" not in line2
:
60 tokens
= line2
.split(" ")
61 sym
= tokens
[len(tokens
)-1].strip()
62 imported_symbols1
.add(sym
)
63 subprocess_objdump
.terminate()
64 subprocess_find
.terminate()
66 # look for imported symbols in executables
67 subprocess_find
= subprocess
.Popen("find ./instdir -name *.bin", stdout
=subprocess
.PIPE
, shell
=True)
68 with subprocess_find
.stdout
as txt
:
70 executable
= line
.strip()
71 # look for exported symbols
72 subprocess_nm
= subprocess
.Popen(b
"nm -D " + executable
+ b
" | grep -w U", stdout
=subprocess
.PIPE
, shell
=True)
73 with subprocess_nm
.stdout
as txt2
:
74 # We are looking for lines something like:
75 # U sal_detail_deinitialize
76 for line2_bytes
in txt2
:
77 line2
= line2_bytes
.strip().decode("utf-8")
78 sym
= line2
.split(" ")[1]
79 imported_symbols1
.add(sym
)
80 subprocess_find
.terminate()
82 # Now we have to symbolize before comparing because sometimes (due to thunks) two
83 # different encoded names symbolize to the same method/func name
86 progress_max_len
= len(imported_symbols1
) + len(exported_symbols1
)
87 for sym
in imported_symbols1
:
89 if (progress
% 128 == 0):
90 print( str(int(progress
* 100 / progress_max_len
)) + "%")
91 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
92 if filtered_sym
.startswith("non-virtual thunk to "):
93 filtered_sym
= filtered_sym
[21:]
94 elif filtered_sym
.startswith("virtual thunk to "):
95 filtered_sym
= filtered_sym
[17:]
96 imported_symbols2
.add(filtered_sym
)
98 for sym
in exported_symbols1
:
100 if (progress
% 128 == 0):
101 print( str(int(progress
* 100 / progress_max_len
)) + "%")
102 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
103 if filtered_sym
.startswith("non-virtual thunk to "):
104 filtered_sym
= filtered_sym
[21:]
105 elif filtered_sym
.startswith("virtual thunk to "):
106 filtered_sym
= filtered_sym
[17:]
107 exported_symbols2
.add(filtered_sym
)
109 unused_exports
= exported_symbols2
- imported_symbols2
110 print("exported = " + str(len(exported_symbols2
)))
111 print("imported = " + str(len(imported_symbols2
)))
112 print("unused_exports = " + str(len(unused_exports
)))
114 # for each class, count how many symbols will become hidden if we mark the class as hidden
115 can_be_hidden_count
= dict()
116 for sym
in exported_symbols2
:
121 if clz
in can_be_hidden_count
:
122 can_be_hidden_count
[clz
] = can_be_hidden_count
[clz
] + 1
124 can_be_hidden_count
[clz
] = 1
125 for sym
in imported_symbols2
:
130 if clz
in can_be_hidden_count
:
131 can_be_hidden_count
[clz
] = can_be_hidden_count
[clz
] - 1
133 can_be_hidden_count
[clz
] = -1
134 # convert to list, and sort the results in descending order
135 can_be_hidden_list
= list()
136 for clz
in can_be_hidden_count
:
137 cnt
= can_be_hidden_count
[clz
]
139 can_be_hidden_list
.append((cnt
, clz
))
140 can_be_hidden_list
.sort(reverse
=True)
141 with
open("bin/find-can-be-private-symbols.classes.results", "wt") as f
:
142 for i
in can_be_hidden_list
:
145 f
.write(str(i
[0]) + " " + i
[1] + "\n")
148 with
open("bin/find-can-be-private-symbols.functions.results", "wt") as f
:
149 for sym
in sorted(unused_exports
):
150 # Filter out most of the noise.
151 # No idea where these are coming from, but not our code.
152 if sym
.startswith("CERT_"):
154 elif sym
.startswith("DER_"):
156 elif sym
.startswith("FORM_"):
158 elif sym
.startswith("FPDF"):
160 elif sym
.startswith("HASH_"):
162 elif sym
.startswith("Hunspell_"):
164 elif sym
.startswith("LL_"):
166 elif sym
.startswith("LP_"):
168 elif sym
.startswith("LU"):
170 elif sym
.startswith("MIP"):
172 elif sym
.startswith("MPS"):
174 elif sym
.startswith("NSS"):
176 elif sym
.startswith("NSC_"):
178 elif sym
.startswith("PK11"):
180 elif sym
.startswith("PL_"):
182 elif sym
.startswith("PQ"):
184 elif sym
.startswith("PBE_"):
186 elif sym
.startswith("PORT_"):
188 elif sym
.startswith("PRP_"):
190 elif sym
.startswith("PR_"):
192 elif sym
.startswith("PT_"):
194 elif sym
.startswith("QS_"):
196 elif sym
.startswith("REPORT_"):
198 elif sym
.startswith("RSA_"):
200 elif sym
.startswith("SEC"):
202 elif sym
.startswith("SGN"):
204 elif sym
.startswith("SOS"):
206 elif sym
.startswith("SSL_"):
208 elif sym
.startswith("VFY_"):
210 elif sym
.startswith("_PR_"):
212 elif sym
.startswith("ber_"):
214 elif sym
.startswith("bfp_"):
216 elif sym
.startswith("ldap_"):
218 elif sym
.startswith("ne_"):
220 elif sym
.startswith("opj_"):
222 elif sym
.startswith("pg_"):
224 elif sym
.startswith("pq"):
226 elif sym
.startswith("presolve_"):
228 elif sym
.startswith("sqlite3_"):
230 elif sym
.startswith("libepubgen::"):
232 elif sym
.startswith("lucene::"):
234 elif sym
.startswith("Hunspell::"):
236 elif sym
.startswith("sk_"):
238 elif sym
.startswith("_Z"):
241 elif sym
.endswith("get_implementation"):
243 elif sym
.endswith("component_getFactory"):
245 elif sym
== "CreateUnoWrapper":
247 elif sym
== "ExportDOC":
249 elif sym
== "ExportRTF":
251 elif sym
== "GetSaveWarningOfMSVBAStorage_ww8":
253 elif sym
== "GetSpecialCharsForEdit":
255 elif sym
.startswith("Import"):
257 elif sym
.startswith("Java_com_sun_star_"):
259 elif sym
.startswith("TestImport"):
261 elif sym
.startswith("getAllCalendars_"):
263 elif sym
.startswith("getAllCurrencies_"):
265 elif sym
.startswith("getAllFormats"):
267 elif sym
.startswith("getBreakIteratorRules_"):
269 elif sym
.startswith("getCollationOptions_"):
271 elif sym
.startswith("getCollatorImplementation_"):
273 elif sym
.startswith("getContinuousNumberingLevels_"):
275 elif sym
.startswith("getDateAcceptancePatterns_"):
277 elif sym
.startswith("getForbiddenCharacters_"):
279 elif sym
.startswith("getIndexAlgorithm_"):
281 elif sym
.startswith("getLCInfo_"):
283 elif sym
.startswith("getLocaleItem_"):
285 elif sym
.startswith("getOutlineNumberingLevels_"):
287 elif sym
.startswith("getReservedWords_"):
289 elif sym
.startswith("getSTC_"):
291 elif sym
.startswith("getSearchOptions_"):
293 elif sym
.startswith("getTransliterations_"):
295 elif sym
.startswith("getUnicodeScripts_"):
297 elif sym
.startswith("lok_"):
300 elif sym
.startswith("osl_"):
302 elif sym
.startswith("rtl_"):
304 elif sym
.startswith("typelib_"):
306 elif sym
.startswith("typereg_"):
308 elif sym
.startswith("uno_"):
310 # remove things we found that do not exist in our source code, they're not ours
311 #if not(extractFunctionNameFromSignature(sym) in all_source_names): continue