don't throw away command output when packaging installsets
[LibreOffice.git] / bin / find-unneeded-includes
blob718ee67a25617d5a0d782e2e932da7c3f09a0991
1 #!/usr/bin/env python3
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 # This parses the output of 'include-what-you-use', focusing on just removing
8 # not needed includes and providing a relatively conservative output by
9 # filtering out a number of LibreOffice-specific false positives.
11 # It assumes you have a 'compile_commands.json' around (similar to clang-tidy),
12 # you can generate one with 'make vim-ide-integration'.
14 # Design goals:
15 # - excludelist mechanism, so a warning is either fixed or excluded
16 # - works in a plugins-enabled clang build
17 # - no custom configure options required
18 # - no need to generate a dummy library to build a header
20 import json
21 import multiprocessing
22 import os
23 import queue
24 import re
25 import subprocess
26 import sys
27 import threading
28 import yaml
29 import argparse
30 import pathlib
33 def ignoreRemoval(include, toAdd, absFileName, moduleRules, noexclude):
34     # global rules
36     # Avoid replacing .hpp with .hdl in the com::sun::star and  ooo::vba namespaces.
37     if ( include.startswith("com/sun/star") or include.startswith("ooo/vba") ) and include.endswith(".hpp"):
38         hdl = include.replace(".hpp", ".hdl")
39         if hdl in toAdd:
40             return True
42     # Avoid debug STL.
43     debugStl = {
44         "array": ("debug/array", ),
45         "bitset": ("debug/bitset", ),
46         "deque": ("debug/deque", ),
47         "forward_list": ("debug/forward_list", ),
48         "list": ("debug/list", ),
49         "map": ("debug/map.h", "debug/multimap.h"),
50         "set": ("debug/set.h", "debug/multiset.h"),
51         "unordered_map": ("debug/unordered_map", ),
52         "unordered_set": ("debug/unordered_set", ),
53         "vector": ("debug/vector", ),
54     }
55     for k, values in debugStl.items():
56         if include == k:
57             for value in values:
58                 if value in toAdd:
59                     return True
61     # Avoid proposing to use libstdc++ internal headers.
62     bits = {
63         "exception": "bits/exception.h",
64         "memory": "bits/shared_ptr.h",
65         "functional": "bits/std_function.h",
66         "cmath": "bits/std_abs.h",
67         "ctime": "bits/types/clock_t.h",
68         "cstdint": "bits/stdint-uintn.h",
69     }
70     for k, v in bits.items():
71         if include == k and v in toAdd:
72             return True
74     # Avoid proposing o3tl fw declaration
75     o3tl = {
76         "o3tl/typed_flags_set.hxx" : "namespace o3tl { template <typename T> struct typed_flags; }",
77         "o3tl/deleter.hxx" : "namespace o3tl { template <typename T> struct default_delete; }",
78     }
79     for k, v, in o3tl.items():
80         if include == k and v in toAdd:
81             return True
83     # Follow boost documentation.
84     if include == "boost/optional.hpp" and "boost/optional/optional.hpp" in toAdd:
85         return True
86     if include == "boost/intrusive_ptr.hpp" and "boost/smart_ptr/intrusive_ptr.hpp" in toAdd:
87         return True
88     if include == "boost/shared_ptr.hpp" and "boost/smart_ptr/shared_ptr.hpp" in toAdd:
89         return True
90     if include == "boost/variant.hpp" and "boost/variant/variant.hpp" in toAdd:
91         return True
92     if include == "boost/unordered_map.hpp" and "boost/unordered/unordered_map.hpp" in toAdd:
93         return True
94     if include == "boost/functional/hash.hpp" and "boost/container_hash/extensions.hpp" in toAdd:
95         return True
97     # Avoid .hxx to .h proposals in basic css/uno/* API
98     unoapi = {
99         "com/sun/star/uno/Any.hxx": "com/sun/star/uno/Any.h",
100         "com/sun/star/uno/Reference.hxx": "com/sun/star/uno/Reference.h",
101         "com/sun/star/uno/Sequence.hxx": "com/sun/star/uno/Sequence.h",
102         "com/sun/star/uno/Type.hxx": "com/sun/star/uno/Type.h"
103     }
104     for k, v in unoapi.items():
105         if include == k and v in toAdd:
106             return True
108     # 3rd-party, non-self-contained headers.
109     if include == "libepubgen/libepubgen.h" and "libepubgen/libepubgen-decls.h" in toAdd:
110         return True
111     if include == "librevenge/librevenge.h" and "librevenge/RVNGPropertyList.h" in toAdd:
112         return True
113     if include == "libetonyek/libetonyek.h" and "libetonyek/EtonyekDocument.h" in toAdd:
114         return True
116     noRemove = (
117         # <https://www.openoffice.org/tools/CodingGuidelines.sxw> insists on not
118         # removing this.
119         "sal/config.h",
120         # Works around a build breakage specific to the broken Android
121         # toolchain.
122         "android/compatibility.hxx",
123         # Removing this would change the meaning of '#if defined OSL_BIGENDIAN'.
124         "osl/endian.h",
125     )
126     if include in noRemove:
127         return True
129     # Ignore when <foo> is to be replaced with "foo".
130     if include in toAdd:
131         return True
133     fileName = os.path.relpath(absFileName, os.getcwd())
135     # Skip headers used only for compile test
136     if fileName == "cppu/qa/cppumaker/test_cppumaker.cxx":
137         if include.endswith(".hpp"):
138             return True
140     # yaml rules, except when --noexclude is given
142     if "excludelist" in moduleRules.keys() and not noexclude:
143         excludelistRules = moduleRules["excludelist"]
144         if fileName in excludelistRules.keys():
145             if include in excludelistRules[fileName]:
146                 return True
148     return False
151 def unwrapInclude(include):
152     # Drop <> or "" around the include.
153     return include[1:-1]
156 def processIWYUOutput(iwyuOutput, moduleRules, fileName, noexclude, checknamespaces):
157     inAdd = False
158     toAdd = []
159     inRemove = False
160     toRemove = []
161     inFull = False
162     currentFileName = None
164     for line in iwyuOutput:
165         line = line.strip()
167         # Bail out if IWYU gave an error due to non self-containedness
168         if re.match ("(.*): error: (.*)", line):
169             return -1
171         if len(line) == 0:
172             if inRemove:
173                 inRemove = False
174                 continue
175             if inAdd:
176                 inAdd = False
177                 continue
178             if inFull:
179                 inFull = False
180                 continue
182         shouldAdd = fileName + " should add these lines:"
183         match = re.match(shouldAdd, line)
184         if match:
185             currentFileName = match.group(0).split(' ')[0]
186             inAdd = True
187             continue
189         shouldRemove = fileName + " should remove these lines:"
190         match = re.match(shouldRemove, line)
191         if match:
192             currentFileName = match.group(0).split(' ')[0]
193             inRemove = True
194             continue
196         if checknamespaces:
197             match = re.match("The full include-list for " + fileName, line)
198             if match:
199                 inFull = True
200                 continue
202         if inAdd:
203             match = re.match('#include ([^ ]+)', line)
204             if match:
205                 include = unwrapInclude(match.group(1))
206                 toAdd.append(include)
207             else:
208                 # Forward declaration.
209                 toAdd.append(line)
211         if inRemove and not checknamespaces:
212             match = re.match("- #include (.*)  // lines (.*)-.*", line)
213             if match:
214                 # Only suggest removals for now. Removing fwd decls is more complex: they may be
215                 # indeed unused or they may removed to be replaced with an include. And we want to
216                 # avoid the later.
217                 include = unwrapInclude(match.group(1))
218                 lineno = match.group(2)
219                 if not ignoreRemoval(include, toAdd, currentFileName, moduleRules, noexclude):
220                     toRemove.append("%s:%s: %s" % (currentFileName, lineno, include))
222         if inFull:
223             if checknamespaces:
224                 # match for all possible URE/UNO namespaces, created with:
225                 # find udkapi/com/sun/star/ -type d | sort| xargs basename -a | tr '\012' '|'
226                 # find offapi/com/sun/star/ -type d | sort | xargs basename -a | tr '\012' '|'
227                 # and ooo::vba namespaces
228                 # plus a few popular ones about other modules
229                 ns = re.compile(
230                                 '.*for\ ('
231                                     # URE namespaces
232                                     'beans|'
233                                     'bridge|oleautomation|'
234                                     'connection|'
235                                     'container|'
236                                     'io|'
237                                     'java|'
238                                     'lang|'
239                                     'loader|'
240                                     'reflection|'
241                                     'registry|'
242                                     'script|'
243                                     'security|'
244                                     'task|'
245                                     'uno|'
246                                     'uri|'
247                                     'util|'
248                                     # UNO namespaces
249                                     'accessibility|'
250                                     'animations|'
251                                     'auth|'
252                                     'awt|tab|tree|grid|'
253                                     'chart|'
254                                     'chart2|data|'
255                                     'configuration|bootstrap|backend|xml|'
256                                     'cui|'
257                                     'datatransfer|clipboard|dnd|'
258                                     'deployment|test|ui|'
259                                     'document|'
260                                     'drawing|framework|'
261                                     'embed|'
262                                     'form|binding|runtime|control|inspection|submission|component|validation|'
263                                     'formula|'
264                                     'frame|status|'
265                                     'gallery|'
266                                     'geometry|'
267                                     'graphic|'
268                                     'i18n|'
269                                     'image|'
270                                     'inspection|'
271                                     'ldap|'
272                                     'linguistic2|'
273                                     'logging|'
274                                     'mail|'
275                                     'media|'
276                                     'mozilla|'
277                                     'office|'
278                                     'packages|zip|manifest|'
279                                     'presentation|textfield|'
280                                     'qa|'
281                                     'rdf|'
282                                     'rendering|'
283                                     'report|inspection|meta|'
284                                     'resource|'
285                                     'scanner|'
286                                     'script|vba|browse|provider|'
287                                     'sdb|application|tools|'
288                                     'sdbc|'
289                                     'sdbcx|'
290                                     'security|'
291                                     'setup|'
292                                     'sheet|opencl|'
293                                     'smarttags|'
294                                     'style|'
295                                     'svg|'
296                                     'system|windows|'
297                                     'table|'
298                                     'task|'
299                                     'text|textfield|docinfo|fieldmaster|'
300                                     'tiledrendering|'
301                                     'ucb|'
302                                     'ui|dialogs|test|'
303                                     'util|'
304                                     'view|'
305                                     'xforms|'
306                                     'xml|xslt|wrapper|csax|sax|input|xpath|dom|views|events|crypto|sax|'
307                                     'xsd|'
308                                      # ooo::vba and its namespaces
309                                     'ooo|vba|excel|powerpoint|adodb|access|office|word|stdole|msforms|dao|'
310                                      # use of module namespaces, as spotted in the code
311                                     'analysis|pricing' # sca internals
312                                     'apphelper|CloneHelper|DataSeriesProperties|SceneProperties|wrapper|' # for chart internals
313                                     'basegfx|utils|'
314                                     'boost|posix_time|gregorian'
315                                     'cairo|'
316                                     'canvas|'
317                                     'chelp|'
318                                     'comphelper|'
319                                     'connectivity|'
320                                     'cpp|java|' # for codemaker::
321                                     'cppu|'
322                                     'dbaccess|dbahsql|dbaui|dbtools|'
323                                     'desktop|dp_misc|'
324                                     'drawinglayer|attribute|geometry|primitive2d|processor2d|'
325                                     'editeng|'
326                                     'emscripten|'
327                                     'formula|'
328                                     'framework|'
329                                     'frm|'
330                                     'http_dav_ucp|tdoc_ucp|package_ucp|hierarchy_ucp|gio|fileaccess|ucb_impl|hcp_impl|ucb_cmdenv|' # for ucb internal
331                                     'i18npool|'
332                                     'internal|ColorComponentTag|' # for slideshow internals
333                                     'jfw_plugin|'
334                                     'jni_uno|'
335                                     'librevenge|'
336                                     'linguistic|'
337                                     'lok|'
338                                     'mtv|' # for mdds::mtv
339                                     'nsSwDocInfoSubType|SWUnoHelper|nsHdFtFlags|' # sw internal
340                                     'o3tl|'
341                                     'odfflatxml|' # filter internal
342                                     'oox|core|drawingml|ole|vml|'
343                                     'OpenStormBento|'
344                                     'osl|'
345                                     'pdfi|pdfparse|'
346                                     'ppt|'
347                                     'pyuno|'
348                                     'reportdesign|'
349                                     'rptui|'
350                                     'rtl|math|textenc|'
351                                     'salhelper|'
352                                     'sax_fastparser|'
353                                     'sax|' # for xml::sax
354                                     'sc|'
355                                     'SchXMLTools|' # for xmloff
356                                     'sd|slidesorter|cache|controller|model|view|'
357                                     'sf_misc|'
358                                     'sfx2|DocTempl|'
359                                     'sidebar|' # for sfx2::sidebar
360                                     'skeletonmaker|'
361                                     'star|' # for com::sun::star
362                                     'std|chrono_literals|literals|'
363                                     'stoc_sec|'
364                                     'store|'
365                                     'svl|impl|'
366                                     'svt|'
367                                     'svtools|'
368                                     'svx|sdr|contact|table|'
369                                     'sw|access|annotation|mark|types|util|'
370                                     'toolkit|'
371                                     'treeview|'
372                                     'ucbhelper|'
373                                     'unodevtools'
374                                     'unopkg|'
375                                     'util|db|qe|' # for xmlsearch::
376                                     'utl|'
377                                     'vcl|psp|x11|'
378                                     'writerfilter|'
379                                     'xforms|'
380                                     'xmloff|token|EnhancedCustomShapeToken' # for xmloff::
381                                     'ZipUtils'
382                                     ')$', re.VERBOSE
383                                 )
385                 reason = re.match(ns, line)
386                 if reason:
387                     # Warn about namespaces: if a header is suggested only '// for $namespace', then the namespace is not used
388                     # otherwise the used classes name would show up after the '// for'
389                     # Cleaning out the respective header (if there is any
390                     # - which is not always the case) is for the next run!
391                     nameSpace = reason.group(1).split(' ')[0]
392                     print("WARNING:", fileName, "This 'using namespace' is likely unnecessary:", nameSpace)
394                     # Get the row number, normal IWYU output does not contain this info
395                     subprocess.run(["git", "grep", "-n", "namespace.*[^a-zA-Z]"+nameSpace+" *;", fileName])
397     for remove in sorted(toRemove):
398         print("ERROR: %s: remove not needed include" % remove)
399     return len(toRemove)
402 def run_tool(task_queue, failed_files, dontstop, noexclude, checknamespaces):
403     while True:
404         invocation, moduleRules = task_queue.get()
405         if not len(failed_files):
406             print("[IWYU] " + invocation.split(' ')[-1])
407             p = subprocess.Popen(invocation, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
408             retcode = processIWYUOutput(p.communicate()[0].decode('utf-8').splitlines(), moduleRules, invocation.split(' ')[-1], noexclude, checknamespaces)
409             if retcode == -1 and not checknamespaces:
410                 print("ERROR: A file is probably not self contained, check this commands output:\n" + invocation)
411             elif retcode > 0:
412                 print("ERROR: The following command found unused includes:\n" + invocation)
413                 if not dontstop:
414                     failed_files.append(invocation)
415         task_queue.task_done()
416     if checknamespaces:
417         # Workaround: sometimes running git grep makes the letters typed into the terminal disappear after the script is finished
418         os.system('stty sane')
421 def isInUnoIncludeFile(path):
422     return path.startswith("include/com/") \
423             or path.startswith("include/cppu/") \
424             or path.startswith("include/cppuhelper/") \
425             or path.startswith("include/osl/") \
426             or path.startswith("include/rtl/") \
427             or path.startswith("include/sal/") \
428             or path.startswith("include/salhelper/") \
429             or path.startswith("include/systools/") \
430             or path.startswith("include/typelib/") \
431             or path.startswith("include/uno/")
434 def tidy(compileCommands, paths, dontstop, noexclude,checknamespaces):
435     return_code = 0
437     try:
438         max_task = multiprocessing.cpu_count()
439         task_queue = queue.Queue(max_task)
440         failed_files = []
441         for _ in range(max_task):
442             t = threading.Thread(target=run_tool, args=(task_queue, failed_files, dontstop, noexclude,checknamespaces))
443             t.daemon = True
444             t.start()
446         for path in sorted(paths):
447             if isInUnoIncludeFile(path):
448                 continue
450             # IWYU fails on these with #error: don't use this in new code
451             if path.startswith("include/vcl/toolkit"):
452                 continue
454             moduleName = path.split("/")[0]
456             rulePath = os.path.join(moduleName, "IwyuFilter_" + moduleName + ".yaml")
457             moduleRules = {}
458             if os.path.exists(rulePath):
459                 moduleRules = yaml.full_load(open(rulePath))
460             assume = None
461             pathAbs = os.path.abspath(path)
462             compileFile = pathAbs
463             matches = [i for i in compileCommands if i["file"] == compileFile]
464             if not len(matches):
465                 # Only use assume-filename for headers, so we don't try to analyze e.g. Windows-only
466                 # code on Linux.
467                 if "assumeFilename" in moduleRules.keys() and not path.endswith("cxx"):
468                     assume = moduleRules["assumeFilename"]
469                 if assume:
470                     assumeAbs = os.path.abspath(assume)
471                     compileFile = assumeAbs
472                     matches = [i for i in compileCommands if i["file"] == compileFile]
473                     if not len(matches):
474                         print("WARNING: no compile commands for '" + path + "' (assumed filename: '" + assume + "'")
475                         continue
476                 else:
477                     print("WARNING: no compile commands for '" + path + "'")
478                     continue
480             _, _, args = matches[0]["command"].partition(" ")
481             if assume:
482                 args = args.replace(assumeAbs, "-x c++ " + pathAbs)
484             invocation = "include-what-you-use -Xiwyu --no_fwd_decls -Xiwyu --max_line_length=200 " + args
485             task_queue.put((invocation, moduleRules))
487         task_queue.join()
488         if len(failed_files):
489             return_code = 1
491     except KeyboardInterrupt:
492         print('\nCtrl-C detected, goodbye.')
493         os.kill(0, 9)
495     sys.exit(return_code)
498 def main(argv):
499     parser = argparse.ArgumentParser(description='Check source files for unneeded includes.')
500     parser.add_argument('--continue', action='store_true',
501                     help='Don\'t stop on errors. Useful for periodic re-check of large amount of files')
502     parser.add_argument('Files' , nargs='*',
503                     help='The files to be checked')
504     parser.add_argument('--recursive', metavar='DIR', nargs=1, type=str,
505                     help='Recursively search a directory for source files to check')
506     parser.add_argument('--headers', action='store_true',
507                     help='Check header files. If omitted, check source files. Use with --recursive.')
508     parser.add_argument('--noexclude', action='store_true',
509                     help='Ignore excludelist. Useful to check whether its exclusions are still all valid.')
510     parser.add_argument('--ns', action='store_true',
511                     help='Warn about unused "using namespace" statements. '
512                          'Removing these may uncover more removable headers '
513                          'in a subsequent normal run')
515     args = parser.parse_args()
517     if not len(argv):
518         parser.print_help()
519         return
521     list_of_files = []
522     if args.recursive:
523         for root, dirs, files in os.walk(args.recursive[0]):
524             for file in files:
525                 if args.headers:
526                     if (file.endswith(".hxx") or file.endswith(".hrc") or file.endswith(".h")):
527                         list_of_files.append(os.path.join(root,file))
528                 else:
529                     if (file.endswith(".cxx") or file.endswith(".c")):
530                         list_of_files.append(os.path.join(root,file))
531     else:
532         list_of_files = args.Files
534     try:
535         with open("compile_commands.json", 'r') as compileCommandsSock:
536             compileCommands = json.load(compileCommandsSock)
537     except FileNotFoundError:
538         print ("File 'compile_commands.json' does not exist, please run:\nmake vim-ide-integration")
539         sys.exit(-1)
541     # quickly sanity check whether files with exceptions in yaml still exists
542     # only check for the module of the very first filename passed
544     # Verify there are files selected for checking, with --recursive it
545     # may happen that there are in fact no C/C++ files in a module directory
546     if not list_of_files:
547         print("No files found to check!")
548         sys.exit(-2)
550     moduleName = sorted(list_of_files)[0].split("/")[0]
551     rulePath = os.path.join(moduleName, "IwyuFilter_" + moduleName + ".yaml")
552     moduleRules = {}
553     if os.path.exists(rulePath):
554         moduleRules = yaml.full_load(open(rulePath))
555     if "excludelist" in moduleRules.keys():
556         excludelistRules = moduleRules["excludelist"]
557         for pathname in excludelistRules.keys():
558             file = pathlib.Path(pathname)
559             if not file.exists():
560                 print("WARNING: File listed in " + rulePath + " no longer exists: " + pathname)
562     tidy(compileCommands, paths=list_of_files, dontstop=vars(args)["continue"], noexclude=args.noexclude, checknamespaces=args.ns)
564 if __name__ == '__main__':
565     main(sys.argv[1:])
567 # vim:set shiftwidth=4 softtabstop=4 expandtab: