3 # Find exported symbols that can be made non-exported.
5 # Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6 # dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
9 # Takes about 5min to run on a decent machine.
11 # The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12 # (something to do with destructor thunks not showing up in my results?)
14 # Also, the class/method analysis will not catch problems like
15 # 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16 # but loplugin:dyncastvisibility will do that for you
22 exported_symbols1
= set()
23 imported_symbols1
= set()
24 exported_symbols2
= set() # decoded
25 imported_symbols2
= set() # decoded
28 # find all our shared libs
29 subprocess_find
= subprocess
.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
30 stdout
=subprocess
.PIPE
, shell
=True)
31 with subprocess_find
.stdout
as txt
:
33 sharedlib
= line
.strip()
34 # look for exported symbols
35 subprocess_nm
= subprocess
.Popen(b
"nm -D " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
36 with subprocess_nm
.stdout
as txt2
:
37 # We are looking for lines something like:
38 # 0000000000036ed0 T flash_component_getFactory
39 line_regex
= re
.compile(r
'^[0-9a-fA-F]+ T ')
40 for line2_bytes
in txt2
:
41 line2
= line2_bytes
.strip().decode("utf-8")
42 if line_regex
.match(line2
):
43 sym
= line2
.split(" ")[2].strip()
44 exported_symbols1
.add(sym
)
45 subprocess_nm
.terminate()
46 # look for imported symbols
47 subprocess_objdump
= subprocess
.Popen(b
"objdump -T " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
48 with subprocess_objdump
.stdout
as txt2
:
49 # ignore some header bumpf
54 # We are looking for lines something like:
55 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
56 for line2_bytes
in txt2
:
57 line2
= line2_bytes
.strip().decode("utf-8")
58 if "*UND*" not in line2
: continue
59 tokens
= line2
.split(" ")
60 sym
= tokens
[len(tokens
)-1].strip()
61 imported_symbols1
.add(sym
)
62 subprocess_objdump
.terminate()
63 subprocess_find
.terminate()
65 # look for imported symbols in executables
66 subprocess_find
= subprocess
.Popen("find ./instdir -name *.bin", stdout
=subprocess
.PIPE
, shell
=True)
67 with subprocess_find
.stdout
as txt
:
69 executable
= line
.strip()
70 # look for exported symbols
71 subprocess_nm
= subprocess
.Popen(b
"nm -D " + executable
+ b
" | grep -w U", stdout
=subprocess
.PIPE
, shell
=True)
72 with subprocess_nm
.stdout
as txt2
:
73 # We are looking for lines something like:
74 # U sal_detail_deinitialize
75 for line2_bytes
in txt2
:
76 line2
= line2_bytes
.strip().decode("utf-8")
77 sym
= line2
.split(" ")[1]
78 imported_symbols1
.add(sym
)
79 subprocess_find
.terminate()
81 # Now we have to symbolize before comparing because sometimes (due to thunks) two
82 # different encoded names symbolize to the same method/func name
85 progress_max_len
= len(imported_symbols1
) + len(exported_symbols1
)
86 for sym
in imported_symbols1
:
88 if (progress
% 128 == 0): print( str(int(progress
* 100 / progress_max_len
)) + "%")
89 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
90 if filtered_sym
.startswith("non-virtual thunk to "): filtered_sym
= filtered_sym
[21:]
91 elif filtered_sym
.startswith("virtual thunk to "): filtered_sym
= filtered_sym
[17:]
92 imported_symbols2
.add(filtered_sym
)
94 for sym
in exported_symbols1
:
96 if (progress
% 128 == 0): print( str(int(progress
* 100 / progress_max_len
)) + "%")
97 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
98 if filtered_sym
.startswith("non-virtual thunk to "): filtered_sym
= filtered_sym
[21:]
99 elif filtered_sym
.startswith("virtual thunk to "): filtered_sym
= filtered_sym
[17:]
100 exported_symbols2
.add(filtered_sym
)
102 unused_exports
= exported_symbols2
- imported_symbols2
103 print("exported = " + str(len(exported_symbols2
)))
104 print("imported = " + str(len(imported_symbols2
)))
105 print("unused_exports = " + str(len(unused_exports
)))
107 # for each class, count how many symbols will become hidden if we mark the class as hidden
108 can_be_hidden_count
= dict()
109 for sym
in exported_symbols2
:
113 if clz
in can_be_hidden_count
:
114 can_be_hidden_count
[clz
] = can_be_hidden_count
[clz
] + 1
116 can_be_hidden_count
[clz
] = 1
117 for sym
in imported_symbols2
:
121 if clz
in can_be_hidden_count
:
122 can_be_hidden_count
[clz
] = can_be_hidden_count
[clz
] - 1
124 can_be_hidden_count
[clz
] = -1
125 # convert to list, and sort the results in descending order
126 can_be_hidden_list
= list()
127 for clz
in can_be_hidden_count
:
128 cnt
= can_be_hidden_count
[clz
]
130 can_be_hidden_list
.append((cnt
, clz
))
131 can_be_hidden_list
.sort(reverse
=True)
132 with
open("bin/find-can-be-private-symbols.classes.results", "wt") as f
:
133 for i
in can_be_hidden_list
:
135 f
.write(str(i
[0]) + " " + i
[1] + "\n")
138 with
open("bin/find-can-be-private-symbols.functions.results", "wt") as f
:
139 for sym
in sorted(unused_exports
):
140 # Filter out most of the noise.
141 # No idea where these are coming from, but not our code.
142 if sym
.startswith("CERT_"): continue
143 elif sym
.startswith("DER_"): continue
144 elif sym
.startswith("FORM_"): continue
145 elif sym
.startswith("FPDF"): continue
146 elif sym
.startswith("HASH_"): continue
147 elif sym
.startswith("Hunspell_"): continue
148 elif sym
.startswith("LL_"): continue
149 elif sym
.startswith("LP_"): continue
150 elif sym
.startswith("LU"): continue
151 elif sym
.startswith("MIP"): continue
152 elif sym
.startswith("MPS"): continue
153 elif sym
.startswith("NSS"): continue
154 elif sym
.startswith("NSC_"): continue
155 elif sym
.startswith("PK11"): continue
156 elif sym
.startswith("PL_"): continue
157 elif sym
.startswith("PQ"): continue
158 elif sym
.startswith("PBE_"): continue
159 elif sym
.startswith("PORT_"): continue
160 elif sym
.startswith("PRP_"): continue
161 elif sym
.startswith("PR_"): continue
162 elif sym
.startswith("PT_"): continue
163 elif sym
.startswith("QS_"): continue
164 elif sym
.startswith("REPORT_"): continue
165 elif sym
.startswith("RSA_"): continue
166 elif sym
.startswith("SEC"): continue
167 elif sym
.startswith("SGN"): continue
168 elif sym
.startswith("SOS"): continue
169 elif sym
.startswith("SSL_"): continue
170 elif sym
.startswith("VFY_"): continue
171 elif sym
.startswith("_PR_"): continue
172 elif sym
.startswith("ber_"): continue
173 elif sym
.startswith("bfp_"): continue
174 elif sym
.startswith("ldap_"): continue
175 elif sym
.startswith("ne_"): continue
176 elif sym
.startswith("opj_"): continue
177 elif sym
.startswith("pg_"): continue
178 elif sym
.startswith("pq"): continue
179 elif sym
.startswith("presolve_"): continue
180 elif sym
.startswith("sqlite3_"): continue
181 elif sym
.startswith("libepubgen::"): continue
182 elif sym
.startswith("lucene::"): continue
183 elif sym
.startswith("Hunspell::"): continue
184 elif sym
.startswith("sk_"): continue
185 elif sym
.startswith("_Z"): continue
187 elif sym
.endswith("get_implementation"): continue
188 elif sym
.endswith("component_getFactory"): continue
189 elif sym
== "CreateUnoWrapper": continue
190 elif sym
== "ExportDOC": continue
191 elif sym
== "ExportRTF": continue
192 elif sym
== "GetSaveWarningOfMSVBAStorage_ww8": continue
193 elif sym
== "GetSpecialCharsForEdit": continue
194 elif sym
.startswith("Import"): continue
195 elif sym
.startswith("Java_com_sun_star_"): continue
196 elif sym
.startswith("TestImport"): continue
197 elif sym
.startswith("getAllCalendars_"): continue
198 elif sym
.startswith("getAllCurrencies_"): continue
199 elif sym
.startswith("getAllFormats"): continue
200 elif sym
.startswith("getBreakIteratorRules_"): continue
201 elif sym
.startswith("getCollationOptions_"): continue
202 elif sym
.startswith("getCollatorImplementation_"): continue
203 elif sym
.startswith("getContinuousNumberingLevels_"): continue
204 elif sym
.startswith("getDateAcceptancePatterns_"): continue
205 elif sym
.startswith("getForbiddenCharacters_"): continue
206 elif sym
.startswith("getIndexAlgorithm_"): continue
207 elif sym
.startswith("getLCInfo_"): continue
208 elif sym
.startswith("getLocaleItem_"): continue
209 elif sym
.startswith("getOutlineNumberingLevels_"): continue
210 elif sym
.startswith("getReservedWords_"): continue
211 elif sym
.startswith("getSTC_"): continue
212 elif sym
.startswith("getSearchOptions_"): continue
213 elif sym
.startswith("getTransliterations_"): continue
214 elif sym
.startswith("getUnicodeScripts_"): continue
215 elif sym
.startswith("lok_"): continue
217 elif sym
.startswith("osl_"): continue
218 elif sym
.startswith("rtl_"): continue
219 elif sym
.startswith("typelib_"): continue
220 elif sym
.startswith("typereg_"): continue
221 elif sym
.startswith("uno_"): continue
222 # remove things we found that do not exist in our source code, they're not ours
223 #if not(extractFunctionNameFromSignature(sym) in all_source_names): continue