1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class for loading and validating a module map or
10 // header list by checking that all headers in the corresponding directories
13 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Driver/Options.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "CoverageChecker.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/FileUtilities.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "ModularizeUtilities.h"
27 using namespace clang
;
29 using namespace Modularize
;
32 // Subclass TargetOptions so we can construct it inline with
33 // the minimal option, the triple.
34 class ModuleMapTargetOptions
: public clang::TargetOptions
{
36 ModuleMapTargetOptions() { Triple
= llvm::sys::getDefaultTargetTriple(); }
40 // ModularizeUtilities class implementation.
43 ModularizeUtilities::ModularizeUtilities(std::vector
<std::string
> &InputPaths
,
44 llvm::StringRef Prefix
,
45 llvm::StringRef ProblemFilesListPath
)
46 : InputFilePaths(InputPaths
), HeaderPrefix(Prefix
),
47 ProblemFilesPath(ProblemFilesListPath
), HasModuleMap(false),
48 MissingHeaderCount(0),
49 // Init clang stuff needed for loading the module map and preprocessing.
50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51 DiagnosticOpts(new DiagnosticOptions()),
52 DC(llvm::errs(), DiagnosticOpts
.get()),
54 new DiagnosticsEngine(DiagIDs
, DiagnosticOpts
.get(), &DC
, false)),
55 TargetOpts(new ModuleMapTargetOptions()),
56 Target(TargetInfo::CreateTargetInfo(*Diagnostics
, TargetOpts
)),
57 FileMgr(new FileManager(FileSystemOpts
)),
58 SourceMgr(new SourceManager(*Diagnostics
, *FileMgr
, false)),
59 HeaderInfo(new HeaderSearch(std::make_shared
<HeaderSearchOptions
>(),
60 *SourceMgr
, *Diagnostics
, *LangOpts
,
63 // Create instance of ModularizeUtilities, to simplify setting up
64 // subordinate objects.
65 ModularizeUtilities
*ModularizeUtilities::createModularizeUtilities(
66 std::vector
<std::string
> &InputPaths
, llvm::StringRef Prefix
,
67 llvm::StringRef ProblemFilesListPath
) {
69 return new ModularizeUtilities(InputPaths
, Prefix
, ProblemFilesListPath
);
72 // Load all header lists and dependencies.
73 std::error_code
ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74 // For each input file.
75 for (auto I
= InputFilePaths
.begin(), E
= InputFilePaths
.end(); I
!= E
; ++I
) {
76 llvm::StringRef InputPath
= *I
;
77 // If it's a module map.
78 if (InputPath
.ends_with(".modulemap")) {
79 // Load the module map.
80 if (std::error_code EC
= loadModuleMap(InputPath
))
83 // Else we assume it's a header list and load it.
84 if (std::error_code EC
= loadSingleHeaderListsAndDependencies(InputPath
)) {
85 errs() << "modularize: error: Unable to get header list '" << InputPath
86 << "': " << EC
.message() << '\n';
91 // If we have a problem files list.
92 if (ProblemFilesPath
.size() != 0) {
93 // Load problem files list.
94 if (std::error_code EC
= loadProblemHeaderList(ProblemFilesPath
)) {
95 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
96 << "': " << EC
.message() << '\n';
100 return std::error_code();
103 // Do coverage checks.
104 // For each loaded module map, do header coverage check.
105 // Starting from the directory of the module.modulemap file,
106 // Find all header files, optionally looking only at files
107 // covered by the include path options, and compare against
108 // the headers referenced by the module.modulemap file.
109 // Display warnings for unaccounted-for header files.
110 // Returns 0 if there were no errors or warnings, 1 if there
111 // were warnings, 2 if any other problem, such as a bad
112 // module map path argument was specified.
113 std::error_code
ModularizeUtilities::doCoverageCheck(
114 std::vector
<std::string
> &IncludePaths
,
115 llvm::ArrayRef
<std::string
> CommandLine
) {
116 int ModuleMapCount
= ModuleMaps
.size();
119 for (ModuleMapIndex
= 0; ModuleMapIndex
< ModuleMapCount
; ++ModuleMapIndex
) {
120 std::unique_ptr
<clang::ModuleMap
> &ModMap
= ModuleMaps
[ModuleMapIndex
];
121 auto Checker
= CoverageChecker::createCoverageChecker(
122 InputFilePaths
[ModuleMapIndex
], IncludePaths
, CommandLine
,
124 std::error_code LocalEC
= Checker
->doChecks();
125 if (LocalEC
.value() > 0)
131 // Load single header list and dependencies.
132 std::error_code
ModularizeUtilities::loadSingleHeaderListsAndDependencies(
133 llvm::StringRef InputPath
) {
135 // By default, use the path component of the list file name.
136 SmallString
<256> HeaderDirectory(InputPath
);
137 llvm::sys::path::remove_filename(HeaderDirectory
);
138 SmallString
<256> CurrentDirectory
;
139 llvm::sys::fs::current_path(CurrentDirectory
);
141 // Get the prefix if we have one.
142 if (HeaderPrefix
.size() != 0)
143 HeaderDirectory
= HeaderPrefix
;
145 // Read the header list file into a buffer.
146 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> listBuffer
=
147 MemoryBuffer::getFile(InputPath
);
148 if (std::error_code EC
= listBuffer
.getError())
151 // Parse the header list into strings.
152 SmallVector
<StringRef
, 32> Strings
;
153 listBuffer
.get()->getBuffer().split(Strings
, "\n", -1, false);
155 // Collect the header file names from the string list.
156 for (SmallVectorImpl
<StringRef
>::iterator I
= Strings
.begin(),
159 StringRef Line
= I
->trim();
160 // Ignore comments and empty lines.
161 if (Line
.empty() || (Line
[0] == '#'))
163 std::pair
<StringRef
, StringRef
> TargetAndDependents
= Line
.split(':');
164 SmallString
<256> HeaderFileName
;
165 // Prepend header file name prefix if it's not absolute.
166 if (llvm::sys::path::is_absolute(TargetAndDependents
.first
))
167 llvm::sys::path::native(TargetAndDependents
.first
, HeaderFileName
);
169 if (HeaderDirectory
.size() != 0)
170 HeaderFileName
= HeaderDirectory
;
172 HeaderFileName
= CurrentDirectory
;
173 llvm::sys::path::append(HeaderFileName
, TargetAndDependents
.first
);
174 llvm::sys::path::native(HeaderFileName
);
176 // Handle optional dependencies.
177 DependentsVector Dependents
;
178 SmallVector
<StringRef
, 4> DependentsList
;
179 TargetAndDependents
.second
.split(DependentsList
, " ", -1, false);
180 int Count
= DependentsList
.size();
181 for (int Index
= 0; Index
< Count
; ++Index
) {
182 SmallString
<256> Dependent
;
183 if (llvm::sys::path::is_absolute(DependentsList
[Index
]))
184 Dependent
= DependentsList
[Index
];
186 if (HeaderDirectory
.size() != 0)
187 Dependent
= HeaderDirectory
;
189 Dependent
= CurrentDirectory
;
190 llvm::sys::path::append(Dependent
, DependentsList
[Index
]);
192 llvm::sys::path::native(Dependent
);
193 Dependents
.push_back(getCanonicalPath(Dependent
.str()));
195 // Get canonical form.
196 HeaderFileName
= getCanonicalPath(HeaderFileName
);
197 // Save the resulting header file path and dependencies.
198 HeaderFileNames
.push_back(std::string(HeaderFileName
));
199 Dependencies
[HeaderFileName
.str()] = Dependents
;
201 return std::error_code();
204 // Load problem header list.
205 std::error_code
ModularizeUtilities::loadProblemHeaderList(
206 llvm::StringRef InputPath
) {
208 // By default, use the path component of the list file name.
209 SmallString
<256> HeaderDirectory(InputPath
);
210 llvm::sys::path::remove_filename(HeaderDirectory
);
211 SmallString
<256> CurrentDirectory
;
212 llvm::sys::fs::current_path(CurrentDirectory
);
214 // Get the prefix if we have one.
215 if (HeaderPrefix
.size() != 0)
216 HeaderDirectory
= HeaderPrefix
;
218 // Read the header list file into a buffer.
219 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> listBuffer
=
220 MemoryBuffer::getFile(InputPath
);
221 if (std::error_code EC
= listBuffer
.getError())
224 // Parse the header list into strings.
225 SmallVector
<StringRef
, 32> Strings
;
226 listBuffer
.get()->getBuffer().split(Strings
, "\n", -1, false);
228 // Collect the header file names from the string list.
229 for (SmallVectorImpl
<StringRef
>::iterator I
= Strings
.begin(),
232 StringRef Line
= I
->trim();
233 // Ignore comments and empty lines.
234 if (Line
.empty() || (Line
[0] == '#'))
236 SmallString
<256> HeaderFileName
;
237 // Prepend header file name prefix if it's not absolute.
238 if (llvm::sys::path::is_absolute(Line
))
239 llvm::sys::path::native(Line
, HeaderFileName
);
241 if (HeaderDirectory
.size() != 0)
242 HeaderFileName
= HeaderDirectory
;
244 HeaderFileName
= CurrentDirectory
;
245 llvm::sys::path::append(HeaderFileName
, Line
);
246 llvm::sys::path::native(HeaderFileName
);
248 // Get canonical form.
249 HeaderFileName
= getCanonicalPath(HeaderFileName
);
250 // Save the resulting header file path.
251 ProblemFileNames
.push_back(std::string(HeaderFileName
));
253 return std::error_code();
256 // Load single module map and extract header file list.
257 std::error_code
ModularizeUtilities::loadModuleMap(
258 llvm::StringRef InputPath
) {
259 // Get file entry for module.modulemap file.
260 auto ModuleMapEntryOrErr
= SourceMgr
->getFileManager().getFileRef(InputPath
);
262 // return error if not found.
263 if (!ModuleMapEntryOrErr
) {
264 llvm::errs() << "error: File \"" << InputPath
<< "\" not found.\n";
265 return errorToErrorCode(ModuleMapEntryOrErr
.takeError());
267 FileEntryRef ModuleMapEntry
= *ModuleMapEntryOrErr
;
269 // Because the module map parser uses a ForwardingDiagnosticConsumer,
270 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
271 DC
.BeginSourceFile(*LangOpts
, nullptr);
273 // Figure out the home directory for the module map file.
274 DirectoryEntryRef Dir
= ModuleMapEntry
.getDir();
275 StringRef
DirName(Dir
.getName());
276 if (llvm::sys::path::filename(DirName
) == "Modules") {
277 DirName
= llvm::sys::path::parent_path(DirName
);
278 if (DirName
.ends_with(".framework")) {
279 auto FrameworkDirOrErr
= FileMgr
->getDirectoryRef(DirName
);
280 if (!FrameworkDirOrErr
) {
281 // This can happen if there's a race between the above check and the
282 // removal of the directory.
283 return errorToErrorCode(FrameworkDirOrErr
.takeError());
285 Dir
= *FrameworkDirOrErr
;
289 std::unique_ptr
<ModuleMap
> ModMap
;
290 ModMap
.reset(new ModuleMap(*SourceMgr
, *Diagnostics
, *LangOpts
,
291 Target
.get(), *HeaderInfo
));
293 // Parse module.modulemap file into module map.
294 if (ModMap
->parseModuleMapFile(ModuleMapEntry
, false, Dir
)) {
295 return std::error_code(1, std::generic_category());
298 // Do matching end call.
301 // Reset missing header count.
302 MissingHeaderCount
= 0;
304 if (!collectModuleMapHeaders(ModMap
.get()))
305 return std::error_code(1, std::generic_category());
308 ModuleMaps
.push_back(std::move(ModMap
));
310 // Indicate we are using module maps.
313 // Return code of 1 for missing headers.
314 if (MissingHeaderCount
)
315 return std::error_code(1, std::generic_category());
317 return std::error_code();
320 // Collect module map headers.
321 // Walks the modules and collects referenced headers into
323 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap
*ModMap
) {
324 SmallVector
<std::pair
<StringRef
, const clang::Module
*>, 0> Vec
;
325 for (auto &M
: ModMap
->modules())
326 Vec
.emplace_back(M
.first(), M
.second
);
327 llvm::sort(Vec
, llvm::less_first());
329 if (!collectModuleHeaders(*I
.second
))
334 // Collect referenced headers from one module.
335 // Collects the headers referenced in the given module into
337 bool ModularizeUtilities::collectModuleHeaders(const clang::Module
&Mod
) {
339 // Ignore explicit modules because they often have dependencies
344 // Treat headers in umbrella directory as dependencies.
345 DependentsVector UmbrellaDependents
;
347 // Recursively do submodules.
348 for (auto *Submodule
: Mod
.submodules())
349 collectModuleHeaders(*Submodule
);
351 if (std::optional
<clang::Module::Header
> UmbrellaHeader
=
352 Mod
.getUmbrellaHeaderAsWritten()) {
353 std::string HeaderPath
= getCanonicalPath(UmbrellaHeader
->Entry
.getName());
354 // Collect umbrella header.
355 HeaderFileNames
.push_back(HeaderPath
);
357 // FUTURE: When needed, umbrella header header collection goes here.
358 } else if (std::optional
<clang::Module::DirectoryName
> UmbrellaDir
=
359 Mod
.getUmbrellaDirAsWritten()) {
360 // If there normal headers, assume these are umbrellas and skip collection.
361 if (Mod
.getHeaders(Module::HK_Normal
).empty()) {
362 // Collect headers in umbrella directory.
363 if (!collectUmbrellaHeaders(UmbrellaDir
->Entry
.getName(),
369 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370 // assuming they are marked as such either because of unsuitability for
371 // modules or because they are meant to be included by another header,
372 // and thus should be ignored by modularize.
374 for (const auto &Header
: Mod
.getHeaders(clang::Module::HK_Normal
))
375 HeaderFileNames
.push_back(getCanonicalPath(Header
.Entry
.getName()));
377 int MissingCountThisModule
= Mod
.MissingHeaders
.size();
379 for (int Index
= 0; Index
< MissingCountThisModule
; ++Index
) {
380 std::string MissingFile
= Mod
.MissingHeaders
[Index
].FileName
;
381 SourceLocation Loc
= Mod
.MissingHeaders
[Index
].FileNameLoc
;
382 errs() << Loc
.printToString(*SourceMgr
)
383 << ": error : Header not found: " << MissingFile
<< "\n";
386 MissingHeaderCount
+= MissingCountThisModule
;
391 // Collect headers from an umbrella directory.
392 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName
,
393 DependentsVector
&Dependents
) {
394 // Initialize directory name.
395 SmallString
<256> Directory(UmbrellaDirName
);
396 // Walk the directory.
398 for (llvm::sys::fs::directory_iterator
I(Directory
.str(), EC
), E
; I
!= E
;
402 std::string
File(I
->path());
403 llvm::ErrorOr
<llvm::sys::fs::basic_file_status
> Status
= I
->status();
406 llvm::sys::fs::file_type Type
= Status
->type();
407 // If the file is a directory, ignore the name and recurse.
408 if (Type
== llvm::sys::fs::file_type::directory_file
) {
409 if (!collectUmbrellaHeaders(File
, Dependents
))
413 // If the file does not have a common header extension, ignore it.
417 std::string HeaderPath
= getCanonicalPath(File
);
418 Dependents
.push_back(HeaderPath
);
423 // Replace .. embedded in path for purposes of having
425 static std::string
replaceDotDot(StringRef Path
) {
426 SmallString
<128> Buffer
;
427 llvm::sys::path::const_iterator B
= llvm::sys::path::begin(Path
),
428 E
= llvm::sys::path::end(Path
);
431 llvm::sys::path::remove_filename(Buffer
);
433 llvm::sys::path::append(Buffer
, *B
);
436 if (Path
.ends_with("/") || Path
.ends_with("\\"))
437 Buffer
.append(1, Path
.back());
438 return Buffer
.c_str();
441 // Convert header path to canonical form.
442 // The canonical form is basically just use forward slashes, and remove "./".
443 // \param FilePath The file path, relative to the module map directory.
444 // \returns The file path in canonical form.
445 std::string
ModularizeUtilities::getCanonicalPath(StringRef FilePath
) {
446 std::string
Tmp(replaceDotDot(FilePath
));
447 std::replace(Tmp
.begin(), Tmp
.end(), '\\', '/');
449 if (Tmp2
.starts_with("./"))
450 Tmp
= std::string(Tmp2
.substr(2));
454 // Check for header file extension.
455 // If the file extension is .h, .inc, or missing, it's
456 // assumed to be a header.
457 // \param FileName The file name. Must not be a directory.
458 // \returns true if it has a header extension or no extension.
459 bool ModularizeUtilities::isHeader(StringRef FileName
) {
460 StringRef Extension
= llvm::sys::path::extension(FileName
);
461 if (Extension
.size() == 0)
463 if (Extension
.equals_insensitive(".h"))
465 if (Extension
.equals_insensitive(".inc"))
470 // Get directory path component from file path.
471 // \returns the component of the given path, which will be
472 // relative if the given path is relative, absolute if the
473 // given path is absolute, or "." if the path has no leading
475 std::string
ModularizeUtilities::getDirectoryFromPath(StringRef Path
) {
476 SmallString
<256> Directory(Path
);
477 sys::path::remove_filename(Directory
);
478 if (Directory
.size() == 0)
480 return std::string(Directory
);
483 // Add unique problem file.
484 // Also standardizes the path.
485 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath
) {
486 FilePath
= getCanonicalPath(FilePath
);
487 // Don't add if already present.
488 for(auto &TestFilePath
: ProblemFileNames
) {
489 if (TestFilePath
== FilePath
)
492 ProblemFileNames
.push_back(FilePath
);
495 // Add file with no compile errors.
496 // Also standardizes the path.
497 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath
) {
498 FilePath
= getCanonicalPath(FilePath
);
499 GoodFileNames
.push_back(FilePath
);
502 // List problem files.
503 void ModularizeUtilities::displayProblemFiles() {
504 errs() << "\nThese are the files with possible errors:\n\n";
505 for (auto &ProblemFile
: ProblemFileNames
) {
506 errs() << ProblemFile
<< "\n";
510 // List files with no problems.
511 void ModularizeUtilities::displayGoodFiles() {
512 errs() << "\nThese are the files with no detected errors:\n\n";
513 for (auto &GoodFile
: HeaderFileNames
) {
515 for (auto &ProblemFile
: ProblemFileNames
) {
516 if (ProblemFile
== GoodFile
) {
522 errs() << GoodFile
<< "\n";
526 // List files with problem files commented out.
527 void ModularizeUtilities::displayCombinedFiles() {
529 "\nThese are the combined files, with problem files preceded by #:\n\n";
530 for (auto &File
: HeaderFileNames
) {
532 for (auto &ProblemFile
: ProblemFileNames
) {
533 if (ProblemFile
== File
) {
538 errs() << (Good
? "" : "#") << File
<< "\n";