1 # -*- coding: utf-8 -*-
2 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 # See https://llvm.org/LICENSE.txt for license information.
4 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 """ This module is responsible for to parse a compiler invocation. """
11 __all__
= ["split_command", "classify_source", "compiler_language"]
13 # Ignored compiler options map for compilation database creation.
14 # The map is used in `split_command` method. (Which does ignore and classify
15 # parameters.) Please note, that these are not the only parameters which
18 # Keys are the option name, value number of options to skip
20 # compiling only flag, ignored because the creator of compilation
21 # database will explicitly set it.
23 # preprocessor macros, ignored because would cause duplicate entries in
24 # the output (the only difference would be these flags). this is actual
25 # finding from users, who suffered longer execution time caused by the
34 # linker options, ignored because for compilation database will contain
35 # compilation commands only. so, the compiler would ignore these flags
36 # anyway. the benefit to get rid of them is to make the output more
50 # Known C/C++ compiler executable name patterns
51 COMPILER_PATTERNS
= frozenset(
53 re
.compile(r
"^(intercept-|analyze-|)c(c|\+\+)$"),
54 re
.compile(r
"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"),
55 re
.compile(r
"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"),
56 re
.compile(r
"^llvm-g(cc|\+\+)$"),
61 def split_command(command
):
62 """Returns a value when the command is a compilation, None otherwise.
64 The value on success is a named tuple with the following attributes:
66 files: list of source files
67 flags: list of compile options
68 compiler: string value of 'c' or 'c++'"""
70 # the result of this method
71 result
= collections
.namedtuple("Compilation", ["compiler", "flags", "files"])
72 result
.compiler
= compiler_language(command
)
75 # quit right now, if the program was not a C/C++ compiler
76 if not result
.compiler
:
78 # iterate on the compile options
79 args
= iter(command
[1:])
81 # quit when compilation pass is not involved
82 if arg
in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}:
85 elif arg
in IGNORED_FLAGS
:
86 count
= IGNORED_FLAGS
[arg
]
87 for _
in range(count
):
89 elif re
.match(r
"^-(l|L|Wl,).+", arg
):
91 # some parameters could look like filename, take as compile option
92 elif arg
in {"-D", "-I"}:
93 result
.flags
.extend([arg
, next(args
)])
94 # parameter which looks source file is taken...
95 elif re
.match(r
"^[^-].+", arg
) and classify_source(arg
):
96 result
.files
.append(arg
)
97 # and consider everything else as compile option.
99 result
.flags
.append(arg
)
100 # do extra check on number of source files
101 return result
if result
.files
else None
104 def classify_source(filename
, c_compiler
=True):
105 """Return the language from file name extension."""
108 ".c": "c" if c_compiler
else "c++",
109 ".i": "c-cpp-output" if c_compiler
else "c++-cpp-output",
110 ".ii": "c++-cpp-output",
112 ".mi": "objective-c-cpp-output",
113 ".mm": "objective-c++",
114 ".mii": "objective-c++-cpp-output",
126 __
, extension
= os
.path
.splitext(os
.path
.basename(filename
))
127 return mapping
.get(extension
)
130 def compiler_language(command
):
131 """A predicate to decide the command is a compiler call or not.
133 Returns 'c' or 'c++' when it match. None otherwise."""
135 cplusplus
= re
.compile(r
"^(.+)(\+\+)(-.+|)$")
138 executable
= os
.path
.basename(command
[0])
139 if any(pattern
.match(executable
) for pattern
in COMPILER_PATTERNS
):
140 return "c++" if cplusplus
.match(executable
) else "c"