Version 6.4.0.3, tag libreoffice-6.4.0.3
[LibreOffice.git] / bin / find-can-be-private-symbols.py
blob0ff17072361a601b2611449fed5e2a68ecc40c5d
1 #!/usr/bin/python
3 # Find exported symbols that can be made non-exported.
5 # Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
6 # dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
7 # is rather complex.
9 # Takes about 5min to run on a decent machine.
11 # The standalone function analysis is reasonable reliable, but the class/method analysis is less so
12 # (something to do with destructor thunks not showing up in my results?)
14 # Also, the class/method analysis will not catch problems like
15 # 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
16 # but loplugin:dyncastvisibility will do that for you
19 import subprocess
20 import sys
21 import re
23 exported_symbols = set()
24 imported_symbols = set()
26 subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout=subprocess.PIPE, shell=True)
27 with subprocess_find.stdout as txt:
28 for line in txt:
29 sharedlib = line.strip()
30 # look for exported symbols
31 subprocess_nm = subprocess.Popen("nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
32 with subprocess_nm.stdout as txt2:
33 # We are looking for lines something like:
34 # 0000000000036ed0 T flash_component_getFactory
35 line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
36 for line2 in txt2:
37 line2 = line2.strip()
38 if line_regex.match(line2):
39 exported_symbols.add(line2.split(" ")[2])
40 # look for imported symbols
41 subprocess_objdump = subprocess.Popen("objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
42 with subprocess_objdump.stdout as txt2:
43 # ignore some header bumpf
44 txt2.readline()
45 txt2.readline()
46 txt2.readline()
47 txt2.readline()
48 # We are looking for lines something like:
49 # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
50 for line2 in txt2:
51 line2 = line2.strip()
52 tokens = line2.split(" ")
53 if len(tokens) < 7 or not(tokens[7].startswith("*UND*")): continue
54 sym = tokens[len(tokens)-1]
55 imported_symbols.add(sym)
56 subprocess_find.terminate()
58 # look for imported symbols in executables
59 subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
60 with subprocess_find.stdout as txt:
61 for line in txt:
62 executable = line.strip()
63 # look for exported symbols
64 subprocess_nm = subprocess.Popen("nm -D " + executable + " | grep -w U", stdout=subprocess.PIPE, shell=True)
65 with subprocess_nm.stdout as txt2:
66 # We are looking for lines something like:
67 # U sal_detail_deinitialize
68 for line2 in txt2:
69 line2 = line2.strip()
70 sym = line2.split(" ")[1]
71 imported_symbols.add(sym)
72 subprocess_find.terminate()
74 diff = exported_symbols - imported_symbols
75 print("exported = " + str(len(exported_symbols)))
76 print("imported = " + str(len(imported_symbols)))
77 print("diff = " + str(len(diff)))
79 # standalone functions that are exported but not imported
80 unused_function_exports = set()
81 classes_with_exported_symbols = set()
82 classes_with_imported_symbols = set()
84 for sym in exported_symbols:
85 filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
86 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
87 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
88 i = filtered_sym.find("(")
89 i = filtered_sym.rfind("::", 0, i)
90 if i != -1:
91 classname = filtered_sym[:i]
92 # find classes where all of the exported symbols are not imported
93 classes_with_exported_symbols.add(classname)
94 else:
95 func = filtered_sym
96 # find standalone functions which are exported but not imported
97 if not(sym in imported_symbols): unused_function_exports.add(func)
99 for sym in imported_symbols:
100 filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
101 if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
102 elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
103 i = filtered_sym.find("(")
104 i = filtered_sym.rfind("::", 0, i)
105 if i != -1:
106 classname = filtered_sym[:i]
107 classes_with_imported_symbols.add(classname)
109 with open("bin/find-can-be-private-symbols.functions.results", "wt") as f:
110 for sym in sorted(unused_function_exports):
111 # Filter out most of the noise.
112 # No idea where these are coming from, but not our code.
113 if sym.startswith("CERT_"): continue
114 elif sym.startswith("DER_"): continue
115 elif sym.startswith("FORM_"): continue
116 elif sym.startswith("FPDF"): continue
117 elif sym.startswith("HASH_"): continue
118 elif sym.startswith("Hunspell_"): continue
119 elif sym.startswith("LL_"): continue
120 elif sym.startswith("LP_"): continue
121 elif sym.startswith("LU"): continue
122 elif sym.startswith("MIP"): continue
123 elif sym.startswith("MPS"): continue
124 elif sym.startswith("NSS"): continue
125 elif sym.startswith("NSC_"): continue
126 elif sym.startswith("PK11"): continue
127 elif sym.startswith("PL_"): continue
128 elif sym.startswith("PQ"): continue
129 elif sym.startswith("PBE_"): continue
130 elif sym.startswith("PORT_"): continue
131 elif sym.startswith("PRP_"): continue
132 elif sym.startswith("PR_"): continue
133 elif sym.startswith("PT_"): continue
134 elif sym.startswith("QS_"): continue
135 elif sym.startswith("REPORT_"): continue
136 elif sym.startswith("RSA_"): continue
137 elif sym.startswith("SEC"): continue
138 elif sym.startswith("SGN"): continue
139 elif sym.startswith("SOS"): continue
140 elif sym.startswith("SSL_"): continue
141 elif sym.startswith("VFY_"): continue
142 elif sym.startswith("_PR_"): continue
143 elif sym.startswith("_"): continue
144 elif sym.startswith("ber_"): continue
145 elif sym.startswith("bfp_"): continue
146 elif sym.startswith("ldap_"): continue
147 elif sym.startswith("ne_"): continue
148 elif sym.startswith("opj_"): continue
149 elif sym.startswith("pg_"): continue
150 elif sym.startswith("pq"): continue
151 elif sym.startswith("presolve_"): continue
152 elif sym.startswith("sqlite3_"): continue
153 # dynamically loaded
154 elif sym.endswith("get_implementation"): continue
155 elif sym.endswith("component_getFactory"): continue
156 elif sym == "CreateDialogFactory": continue
157 elif sym == "CreateUnoWrapper": continue
158 elif sym == "CreateWindow": continue
159 elif sym == "ExportDOC": continue
160 elif sym == "ExportPPT": continue
161 elif sym == "ExportRTF": continue
162 elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue
163 elif sym == "GetSpecialCharsForEdit": continue
164 elif sym.startswith("Import"): continue
165 elif sym.startswith("Java_com_sun_star_"): continue
166 elif sym.startswith("TestImport"): continue
167 elif sym.startswith("getAllCalendars_"): continue
168 elif sym.startswith("getAllCurrencies_"): continue
169 elif sym.startswith("getAllFormats"): continue
170 elif sym.startswith("getBreakIteratorRules_"): continue
171 elif sym.startswith("getCollationOptions_"): continue
172 elif sym.startswith("getCollatorImplementation_"): continue
173 elif sym.startswith("getContinuousNumberingLevels_"): continue
174 elif sym.startswith("getDateAcceptancePatterns_"): continue
175 elif sym.startswith("getForbiddenCharacters_"): continue
176 elif sym.startswith("getIndexAlgorithm_"): continue
177 elif sym.startswith("getLCInfo_"): continue
178 elif sym.startswith("getLocaleItem_"): continue
179 elif sym.startswith("getOutlineNumberingLevels_"): continue
180 elif sym.startswith("getReservedWords_"): continue
181 elif sym.startswith("getSTC_"): continue
182 elif sym.startswith("getSearchOptions_"): continue
183 elif sym.startswith("getTransliterations_"): continue
184 elif sym.startswith("getUnicodeScripts_"): continue
185 elif sym.startswith("lok_"): continue
186 # UDK API
187 elif sym.startswith("osl_"): continue
188 elif sym.startswith("rtl_"): continue
189 elif sym.startswith("typelib_"): continue
190 elif sym.startswith("typereg_"): continue
191 elif sym.startswith("uno_"): continue
192 f.write(sym + "\n")
194 with open("bin/find-can-be-private-symbols.classes.results", "wt") as f:
195 for sym in sorted(classes_with_exported_symbols - classes_with_imported_symbols):
196 # externals
197 if sym.startswith("libcdr"): continue
198 elif sym.startswith("libabw"): continue
199 elif sym.startswith("libebook"): continue
200 elif sym.startswith("libepubgen"): continue
201 elif sym.startswith("libfreehand"): continue
202 elif sym.startswith("libmspub"): continue
203 elif sym.startswith("libpagemaker"): continue
204 elif sym.startswith("libqxp"): continue
205 elif sym.startswith("libvisio"): continue
206 elif sym.startswith("libzmf"): continue
207 elif sym.startswith("lucene::"): continue
208 f.write(sym + "\n")