3 # Find exported symbols that can be made non-exported.
5 # Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6 # dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
9 # Takes about 5min to run on a decent machine.
11 # The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12 # (something to do with destructor thunks not showing up in my results?)
14 # Also, the class/method analysis will not catch problems like
15 # 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16 # but loplugin:dyncastvisibility will do that for you
23 exported_symbols
= set()
24 imported_symbols
= set()
25 # standalone functions that are exported but not imported
26 unused_function_exports
= set()
27 classes_with_exported_symbols
= set()
28 classes_with_imported_symbols
= set()
29 # all names that exist in the source code
30 all_source_names
= set()
33 subprocess_find_all_source_names
= subprocess
.Popen("git grep -oh -P '\\b\\w\\w\\w+\\b' -- '*.h*' | sort -u", stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
, shell
=True)
34 with subprocess_find_all_source_names
.stdout
as txt
:
37 all_source_names
.add(line
)
38 subprocess_find_all_source_names
.terminate()
40 # find all our shared libs
41 subprocess_find
= subprocess
.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout
=subprocess
.PIPE
, shell
=True)
42 with subprocess_find
.stdout
as txt
:
44 sharedlib
= line
.strip()
45 # look for exported symbols
46 subprocess_nm
= subprocess
.Popen(b
"nm -D " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
47 with subprocess_nm
.stdout
as txt2
:
48 # We are looking for lines something like:
49 # 0000000000036ed0 T flash_component_getFactory
50 line_regex
= re
.compile(r
'^[0-9a-fA-F]+ T ')
51 for line2_bytes
in txt2
:
52 line2
= line2_bytes
.strip().decode("utf-8")
53 if line_regex
.match(line2
):
54 sym
= line2
.split(" ")[2]
55 exported_symbols
.add(sym
)
56 subprocess_nm
.terminate()
57 # look for imported symbols
58 subprocess_objdump
= subprocess
.Popen(b
"objdump -T " + sharedlib
, stdout
=subprocess
.PIPE
, shell
=True)
59 with subprocess_objdump
.stdout
as txt2
:
60 # ignore some header bumpf
65 # We are looking for lines something like:
66 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
67 for line2_bytes
in txt2
:
68 line2
= line2_bytes
.strip().decode("utf-8")
69 if not("*UND*"in line2
): continue
70 tokens
= line2
.split(" ")
71 sym
= tokens
[len(tokens
)-1]
72 imported_symbols
.add(sym
)
73 subprocess_objdump
.terminate()
74 subprocess_find
.terminate()
76 # look for imported symbols in executables
77 subprocess_find
= subprocess
.Popen("find ./instdir -name *.bin", stdout
=subprocess
.PIPE
, shell
=True)
78 with subprocess_find
.stdout
as txt
:
80 executable
= line
.strip()
81 # look for exported symbols
82 subprocess_nm
= subprocess
.Popen(b
"nm -D " + executable
+ b
" | grep -w U", stdout
=subprocess
.PIPE
, shell
=True)
83 with subprocess_nm
.stdout
as txt2
:
84 # We are looking for lines something like:
85 # U sal_detail_deinitialize
86 for line2_bytes
in txt2
:
87 line2
= line2_bytes
.strip().decode("utf-8")
88 sym
= line2
.split(" ")[1]
89 imported_symbols
.add(sym
)
90 subprocess_find
.terminate()
92 diff
= exported_symbols
- imported_symbols
93 print("exported = " + str(len(exported_symbols
)))
94 print("imported = " + str(len(imported_symbols
)))
95 print("diff = " + str(len(diff
)))
98 for sym
in sorted(exported_symbols
):
100 if (progress
% 128 == 0): print( str(int(progress
* 100 / len(exported_symbols
))) + "%")
101 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
102 if filtered_sym
.startswith("non-virtual thunk to "): filtered_sym
= filtered_sym
[21:]
103 elif filtered_sym
.startswith("virtual thunk to "): filtered_sym
= filtered_sym
[17:]
104 i
= filtered_sym
.find("(")
105 i
= filtered_sym
.rfind("::", 0, i
)
107 classname
= filtered_sym
[:i
]
108 # find classes where all of the exported symbols are not imported
109 classes_with_exported_symbols
.add(classname
)
112 # find standalone functions which are exported but not imported
113 if not(sym
in imported_symbols
): unused_function_exports
.add(func
)
116 for sym
in sorted(imported_symbols
):
118 if (progress
% 128 == 0): print( str(int(progress
* 100 / len(imported_symbols
))) + "%")
119 filtered_sym
= subprocess
.check_output(["c++filt", sym
]).strip().decode("utf-8")
120 if filtered_sym
.startswith("non-virtual thunk to "): filtered_sym
= filtered_sym
[21:]
121 elif filtered_sym
.startswith("virtual thunk to "): filtered_sym
= filtered_sym
[17:]
122 i
= filtered_sym
.find("(")
123 i
= filtered_sym
.rfind("::", 0, i
)
125 classname
= filtered_sym
[:i
]
126 classes_with_imported_symbols
.add(classname
)
128 def extractFunctionNameFromSignature(sym
):
130 if i
== -1: return sym
133 with
open("bin/find-can-be-private-symbols.functions.results", "wt") as f
:
134 for sym
in sorted(unused_function_exports
):
135 # Filter out most of the noise.
136 # No idea where these are coming from, but not our code.
137 if sym
.startswith("CERT_"): continue
138 elif sym
.startswith("DER_"): continue
139 elif sym
.startswith("FORM_"): continue
140 elif sym
.startswith("FPDF"): continue
141 elif sym
.startswith("HASH_"): continue
142 elif sym
.startswith("Hunspell_"): continue
143 elif sym
.startswith("LL_"): continue
144 elif sym
.startswith("LP_"): continue
145 elif sym
.startswith("LU"): continue
146 elif sym
.startswith("MIP"): continue
147 elif sym
.startswith("MPS"): continue
148 elif sym
.startswith("NSS"): continue
149 elif sym
.startswith("NSC_"): continue
150 elif sym
.startswith("PK11"): continue
151 elif sym
.startswith("PL_"): continue
152 elif sym
.startswith("PQ"): continue
153 elif sym
.startswith("PBE_"): continue
154 elif sym
.startswith("PORT_"): continue
155 elif sym
.startswith("PRP_"): continue
156 elif sym
.startswith("PR_"): continue
157 elif sym
.startswith("PT_"): continue
158 elif sym
.startswith("QS_"): continue
159 elif sym
.startswith("REPORT_"): continue
160 elif sym
.startswith("RSA_"): continue
161 elif sym
.startswith("SEC"): continue
162 elif sym
.startswith("SGN"): continue
163 elif sym
.startswith("SOS"): continue
164 elif sym
.startswith("SSL_"): continue
165 elif sym
.startswith("VFY_"): continue
166 elif sym
.startswith("_PR_"): continue
167 elif sym
.startswith("_"): continue
168 elif sym
.startswith("ber_"): continue
169 elif sym
.startswith("bfp_"): continue
170 elif sym
.startswith("ldap_"): continue
171 elif sym
.startswith("ne_"): continue
172 elif sym
.startswith("opj_"): continue
173 elif sym
.startswith("pg_"): continue
174 elif sym
.startswith("pq"): continue
175 elif sym
.startswith("presolve_"): continue
176 elif sym
.startswith("sqlite3_"): continue
178 elif sym
.endswith("get_implementation"): continue
179 elif sym
.endswith("component_getFactory"): continue
180 elif sym
== "CreateDialogFactory": continue
181 elif sym
== "CreateUnoWrapper": continue
182 elif sym
== "CreateWindow": continue
183 elif sym
== "ExportDOC": continue
184 elif sym
== "ExportPPT": continue
185 elif sym
== "ExportRTF": continue
186 elif sym
== "GetSaveWarningOfMSVBAStorage_ww8": continue
187 elif sym
== "GetSpecialCharsForEdit": continue
188 elif sym
.startswith("Import"): continue
189 elif sym
.startswith("Java_com_sun_star_"): continue
190 elif sym
.startswith("TestImport"): continue
191 elif sym
.startswith("getAllCalendars_"): continue
192 elif sym
.startswith("getAllCurrencies_"): continue
193 elif sym
.startswith("getAllFormats"): continue
194 elif sym
.startswith("getBreakIteratorRules_"): continue
195 elif sym
.startswith("getCollationOptions_"): continue
196 elif sym
.startswith("getCollatorImplementation_"): continue
197 elif sym
.startswith("getContinuousNumberingLevels_"): continue
198 elif sym
.startswith("getDateAcceptancePatterns_"): continue
199 elif sym
.startswith("getForbiddenCharacters_"): continue
200 elif sym
.startswith("getIndexAlgorithm_"): continue
201 elif sym
.startswith("getLCInfo_"): continue
202 elif sym
.startswith("getLocaleItem_"): continue
203 elif sym
.startswith("getOutlineNumberingLevels_"): continue
204 elif sym
.startswith("getReservedWords_"): continue
205 elif sym
.startswith("getSTC_"): continue
206 elif sym
.startswith("getSearchOptions_"): continue
207 elif sym
.startswith("getTransliterations_"): continue
208 elif sym
.startswith("getUnicodeScripts_"): continue
209 elif sym
.startswith("lok_"): continue
211 elif sym
.startswith("osl_"): continue
212 elif sym
.startswith("rtl_"): continue
213 elif sym
.startswith("typelib_"): continue
214 elif sym
.startswith("typereg_"): continue
215 elif sym
.startswith("uno_"): continue
216 # remove things we found that do not exist in our source code, they're not ours
217 if not(extractFunctionNameFromSignature(sym
) in all_source_names
): continue
220 with
open("bin/find-can-be-private-symbols.classes.results", "wt") as f
:
221 for sym
in sorted(classes_with_exported_symbols
- classes_with_imported_symbols
):
223 if sym
.startswith("libcdr"): continue
224 elif sym
.startswith("libabw"): continue
225 elif sym
.startswith("libebook"): continue
226 elif sym
.startswith("libepubgen"): continue
227 elif sym
.startswith("libfreehand"): continue
228 elif sym
.startswith("libmspub"): continue
229 elif sym
.startswith("libpagemaker"): continue
230 elif sym
.startswith("libqxp"): continue
231 elif sym
.startswith("libvisio"): continue
232 elif sym
.startswith("libzmf"): continue
233 elif sym
.startswith("lucene::"): continue
234 elif sym
.startswith("Sk"): continue