1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class for loading and validating a module map or
10 // header list by checking that all headers in the corresponding directories
13 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Driver/Options.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "CoverageChecker.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/FileUtilities.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "ModularizeUtilities.h"
27 using namespace clang
;
29 using namespace Modularize
;
32 // Subclass TargetOptions so we can construct it inline with
33 // the minimal option, the triple.
34 class ModuleMapTargetOptions
: public clang::TargetOptions
{
36 ModuleMapTargetOptions() { Triple
= llvm::sys::getDefaultTargetTriple(); }
40 // ModularizeUtilities class implementation.
43 ModularizeUtilities::ModularizeUtilities(std::vector
<std::string
> &InputPaths
,
44 llvm::StringRef Prefix
,
45 llvm::StringRef ProblemFilesListPath
)
46 : InputFilePaths(InputPaths
), HeaderPrefix(Prefix
),
47 ProblemFilesPath(ProblemFilesListPath
), HasModuleMap(false),
48 MissingHeaderCount(0),
49 // Init clang stuff needed for loading the module map and preprocessing.
50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51 DiagnosticOpts(new DiagnosticOptions()),
52 DC(llvm::errs(), DiagnosticOpts
.get()),
54 new DiagnosticsEngine(DiagIDs
, DiagnosticOpts
.get(), &DC
, false)),
55 TargetOpts(new ModuleMapTargetOptions()),
56 Target(TargetInfo::CreateTargetInfo(*Diagnostics
, TargetOpts
)),
57 FileMgr(new FileManager(FileSystemOpts
)),
58 SourceMgr(new SourceManager(*Diagnostics
, *FileMgr
, false)),
59 HeaderInfo(new HeaderSearch(std::make_shared
<HeaderSearchOptions
>(),
60 *SourceMgr
, *Diagnostics
, *LangOpts
,
63 // Create instance of ModularizeUtilities, to simplify setting up
64 // subordinate objects.
65 ModularizeUtilities
*ModularizeUtilities::createModularizeUtilities(
66 std::vector
<std::string
> &InputPaths
, llvm::StringRef Prefix
,
67 llvm::StringRef ProblemFilesListPath
) {
69 return new ModularizeUtilities(InputPaths
, Prefix
, ProblemFilesListPath
);
72 // Load all header lists and dependencies.
73 std::error_code
ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74 // For each input file.
75 for (auto I
= InputFilePaths
.begin(), E
= InputFilePaths
.end(); I
!= E
; ++I
) {
76 llvm::StringRef InputPath
= *I
;
77 // If it's a module map.
78 if (InputPath
.endswith(".modulemap")) {
79 // Load the module map.
80 if (std::error_code EC
= loadModuleMap(InputPath
))
84 // Else we assume it's a header list and load it.
85 if (std::error_code EC
= loadSingleHeaderListsAndDependencies(InputPath
)) {
86 errs() << "modularize: error: Unable to get header list '" << InputPath
87 << "': " << EC
.message() << '\n';
92 // If we have a problem files list.
93 if (ProblemFilesPath
.size() != 0) {
94 // Load problem files list.
95 if (std::error_code EC
= loadProblemHeaderList(ProblemFilesPath
)) {
96 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
97 << "': " << EC
.message() << '\n';
101 return std::error_code();
104 // Do coverage checks.
105 // For each loaded module map, do header coverage check.
106 // Starting from the directory of the module.map file,
107 // Find all header files, optionally looking only at files
108 // covered by the include path options, and compare against
109 // the headers referenced by the module.map file.
110 // Display warnings for unaccounted-for header files.
111 // Returns 0 if there were no errors or warnings, 1 if there
112 // were warnings, 2 if any other problem, such as a bad
113 // module map path argument was specified.
114 std::error_code
ModularizeUtilities::doCoverageCheck(
115 std::vector
<std::string
> &IncludePaths
,
116 llvm::ArrayRef
<std::string
> CommandLine
) {
117 int ModuleMapCount
= ModuleMaps
.size();
120 for (ModuleMapIndex
= 0; ModuleMapIndex
< ModuleMapCount
; ++ModuleMapIndex
) {
121 std::unique_ptr
<clang::ModuleMap
> &ModMap
= ModuleMaps
[ModuleMapIndex
];
122 auto Checker
= CoverageChecker::createCoverageChecker(
123 InputFilePaths
[ModuleMapIndex
], IncludePaths
, CommandLine
,
125 std::error_code LocalEC
= Checker
->doChecks();
126 if (LocalEC
.value() > 0)
132 // Load single header list and dependencies.
133 std::error_code
ModularizeUtilities::loadSingleHeaderListsAndDependencies(
134 llvm::StringRef InputPath
) {
136 // By default, use the path component of the list file name.
137 SmallString
<256> HeaderDirectory(InputPath
);
138 llvm::sys::path::remove_filename(HeaderDirectory
);
139 SmallString
<256> CurrentDirectory
;
140 llvm::sys::fs::current_path(CurrentDirectory
);
142 // Get the prefix if we have one.
143 if (HeaderPrefix
.size() != 0)
144 HeaderDirectory
= HeaderPrefix
;
146 // Read the header list file into a buffer.
147 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> listBuffer
=
148 MemoryBuffer::getFile(InputPath
);
149 if (std::error_code EC
= listBuffer
.getError())
152 // Parse the header list into strings.
153 SmallVector
<StringRef
, 32> Strings
;
154 listBuffer
.get()->getBuffer().split(Strings
, "\n", -1, false);
156 // Collect the header file names from the string list.
157 for (SmallVectorImpl
<StringRef
>::iterator I
= Strings
.begin(),
160 StringRef Line
= I
->trim();
161 // Ignore comments and empty lines.
162 if (Line
.empty() || (Line
[0] == '#'))
164 std::pair
<StringRef
, StringRef
> TargetAndDependents
= Line
.split(':');
165 SmallString
<256> HeaderFileName
;
166 // Prepend header file name prefix if it's not absolute.
167 if (llvm::sys::path::is_absolute(TargetAndDependents
.first
))
168 llvm::sys::path::native(TargetAndDependents
.first
, HeaderFileName
);
170 if (HeaderDirectory
.size() != 0)
171 HeaderFileName
= HeaderDirectory
;
173 HeaderFileName
= CurrentDirectory
;
174 llvm::sys::path::append(HeaderFileName
, TargetAndDependents
.first
);
175 llvm::sys::path::native(HeaderFileName
);
177 // Handle optional dependencies.
178 DependentsVector Dependents
;
179 SmallVector
<StringRef
, 4> DependentsList
;
180 TargetAndDependents
.second
.split(DependentsList
, " ", -1, false);
181 int Count
= DependentsList
.size();
182 for (int Index
= 0; Index
< Count
; ++Index
) {
183 SmallString
<256> Dependent
;
184 if (llvm::sys::path::is_absolute(DependentsList
[Index
]))
185 Dependent
= DependentsList
[Index
];
187 if (HeaderDirectory
.size() != 0)
188 Dependent
= HeaderDirectory
;
190 Dependent
= CurrentDirectory
;
191 llvm::sys::path::append(Dependent
, DependentsList
[Index
]);
193 llvm::sys::path::native(Dependent
);
194 Dependents
.push_back(getCanonicalPath(Dependent
.str()));
196 // Get canonical form.
197 HeaderFileName
= getCanonicalPath(HeaderFileName
);
198 // Save the resulting header file path and dependencies.
199 HeaderFileNames
.push_back(std::string(HeaderFileName
.str()));
200 Dependencies
[HeaderFileName
.str()] = Dependents
;
202 return std::error_code();
205 // Load problem header list.
206 std::error_code
ModularizeUtilities::loadProblemHeaderList(
207 llvm::StringRef InputPath
) {
209 // By default, use the path component of the list file name.
210 SmallString
<256> HeaderDirectory(InputPath
);
211 llvm::sys::path::remove_filename(HeaderDirectory
);
212 SmallString
<256> CurrentDirectory
;
213 llvm::sys::fs::current_path(CurrentDirectory
);
215 // Get the prefix if we have one.
216 if (HeaderPrefix
.size() != 0)
217 HeaderDirectory
= HeaderPrefix
;
219 // Read the header list file into a buffer.
220 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> listBuffer
=
221 MemoryBuffer::getFile(InputPath
);
222 if (std::error_code EC
= listBuffer
.getError())
225 // Parse the header list into strings.
226 SmallVector
<StringRef
, 32> Strings
;
227 listBuffer
.get()->getBuffer().split(Strings
, "\n", -1, false);
229 // Collect the header file names from the string list.
230 for (SmallVectorImpl
<StringRef
>::iterator I
= Strings
.begin(),
233 StringRef Line
= I
->trim();
234 // Ignore comments and empty lines.
235 if (Line
.empty() || (Line
[0] == '#'))
237 SmallString
<256> HeaderFileName
;
238 // Prepend header file name prefix if it's not absolute.
239 if (llvm::sys::path::is_absolute(Line
))
240 llvm::sys::path::native(Line
, HeaderFileName
);
242 if (HeaderDirectory
.size() != 0)
243 HeaderFileName
= HeaderDirectory
;
245 HeaderFileName
= CurrentDirectory
;
246 llvm::sys::path::append(HeaderFileName
, Line
);
247 llvm::sys::path::native(HeaderFileName
);
249 // Get canonical form.
250 HeaderFileName
= getCanonicalPath(HeaderFileName
);
251 // Save the resulting header file path.
252 ProblemFileNames
.push_back(std::string(HeaderFileName
.str()));
254 return std::error_code();
257 // Load single module map and extract header file list.
258 std::error_code
ModularizeUtilities::loadModuleMap(
259 llvm::StringRef InputPath
) {
260 // Get file entry for module.modulemap file.
261 auto ModuleMapEntryOrErr
= SourceMgr
->getFileManager().getFileRef(InputPath
);
263 // return error if not found.
264 if (!ModuleMapEntryOrErr
) {
265 llvm::errs() << "error: File \"" << InputPath
<< "\" not found.\n";
266 return errorToErrorCode(ModuleMapEntryOrErr
.takeError());
268 FileEntryRef ModuleMapEntry
= *ModuleMapEntryOrErr
;
270 // Because the module map parser uses a ForwardingDiagnosticConsumer,
271 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
272 DC
.BeginSourceFile(*LangOpts
, nullptr);
274 // Figure out the home directory for the module map file.
275 DirectoryEntryRef Dir
= ModuleMapEntry
.getDir();
276 StringRef
DirName(Dir
.getName());
277 if (llvm::sys::path::filename(DirName
) == "Modules") {
278 DirName
= llvm::sys::path::parent_path(DirName
);
279 if (DirName
.endswith(".framework")) {
280 auto FrameworkDirOrErr
= FileMgr
->getDirectoryRef(DirName
);
281 if (!FrameworkDirOrErr
) {
282 // This can happen if there's a race between the above check and the
283 // removal of the directory.
284 return errorToErrorCode(FrameworkDirOrErr
.takeError());
286 Dir
= *FrameworkDirOrErr
;
290 std::unique_ptr
<ModuleMap
> ModMap
;
291 ModMap
.reset(new ModuleMap(*SourceMgr
, *Diagnostics
, *LangOpts
,
292 Target
.get(), *HeaderInfo
));
294 // Parse module.modulemap file into module map.
295 if (ModMap
->parseModuleMapFile(ModuleMapEntry
, false, Dir
)) {
296 return std::error_code(1, std::generic_category());
299 // Do matching end call.
302 // Reset missing header count.
303 MissingHeaderCount
= 0;
305 if (!collectModuleMapHeaders(ModMap
.get()))
306 return std::error_code(1, std::generic_category());
309 ModuleMaps
.push_back(std::move(ModMap
));
311 // Indicate we are using module maps.
314 // Return code of 1 for missing headers.
315 if (MissingHeaderCount
)
316 return std::error_code(1, std::generic_category());
318 return std::error_code();
321 // Collect module map headers.
322 // Walks the modules and collects referenced headers into
324 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap
*ModMap
) {
325 SmallVector
<std::pair
<StringRef
, const clang::Module
*>, 0> Vec
;
326 for (auto &M
: ModMap
->modules())
327 Vec
.emplace_back(M
.first(), M
.second
);
328 llvm::sort(Vec
, llvm::less_first());
330 if (!collectModuleHeaders(*I
.second
))
335 // Collect referenced headers from one module.
336 // Collects the headers referenced in the given module into
338 bool ModularizeUtilities::collectModuleHeaders(const clang::Module
&Mod
) {
340 // Ignore explicit modules because they often have dependencies
345 // Treat headers in umbrella directory as dependencies.
346 DependentsVector UmbrellaDependents
;
348 // Recursively do submodules.
349 for (auto *Submodule
: Mod
.submodules())
350 collectModuleHeaders(*Submodule
);
352 if (std::optional
<clang::Module::Header
> UmbrellaHeader
=
353 Mod
.getUmbrellaHeaderAsWritten()) {
354 std::string HeaderPath
= getCanonicalPath(UmbrellaHeader
->Entry
.getName());
355 // Collect umbrella header.
356 HeaderFileNames
.push_back(HeaderPath
);
358 // FUTURE: When needed, umbrella header header collection goes here.
359 } else if (std::optional
<clang::Module::DirectoryName
> UmbrellaDir
=
360 Mod
.getUmbrellaDirAsWritten()) {
361 // If there normal headers, assume these are umbrellas and skip collection.
362 if (Mod
.Headers
->size() == 0) {
363 // Collect headers in umbrella directory.
364 if (!collectUmbrellaHeaders(UmbrellaDir
->Entry
.getName(),
370 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
371 // assuming they are marked as such either because of unsuitability for
372 // modules or because they are meant to be included by another header,
373 // and thus should be ignored by modularize.
375 int NormalHeaderCount
= Mod
.Headers
[clang::Module::HK_Normal
].size();
377 for (int Index
= 0; Index
< NormalHeaderCount
; ++Index
) {
378 DependentsVector NormalDependents
;
379 // Collect normal header.
380 const clang::Module::Header
&Header(
381 Mod
.Headers
[clang::Module::HK_Normal
][Index
]);
382 std::string HeaderPath
= getCanonicalPath(Header
.Entry
.getName());
383 HeaderFileNames
.push_back(HeaderPath
);
386 int MissingCountThisModule
= Mod
.MissingHeaders
.size();
388 for (int Index
= 0; Index
< MissingCountThisModule
; ++Index
) {
389 std::string MissingFile
= Mod
.MissingHeaders
[Index
].FileName
;
390 SourceLocation Loc
= Mod
.MissingHeaders
[Index
].FileNameLoc
;
391 errs() << Loc
.printToString(*SourceMgr
)
392 << ": error : Header not found: " << MissingFile
<< "\n";
395 MissingHeaderCount
+= MissingCountThisModule
;
400 // Collect headers from an umbrella directory.
401 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName
,
402 DependentsVector
&Dependents
) {
403 // Initialize directory name.
404 SmallString
<256> Directory(UmbrellaDirName
);
405 // Walk the directory.
407 for (llvm::sys::fs::directory_iterator
I(Directory
.str(), EC
), E
; I
!= E
;
411 std::string
File(I
->path());
412 llvm::ErrorOr
<llvm::sys::fs::basic_file_status
> Status
= I
->status();
415 llvm::sys::fs::file_type Type
= Status
->type();
416 // If the file is a directory, ignore the name and recurse.
417 if (Type
== llvm::sys::fs::file_type::directory_file
) {
418 if (!collectUmbrellaHeaders(File
, Dependents
))
422 // If the file does not have a common header extension, ignore it.
426 std::string HeaderPath
= getCanonicalPath(File
);
427 Dependents
.push_back(HeaderPath
);
432 // Replace .. embedded in path for purposes of having
434 static std::string
replaceDotDot(StringRef Path
) {
435 SmallString
<128> Buffer
;
436 llvm::sys::path::const_iterator B
= llvm::sys::path::begin(Path
),
437 E
= llvm::sys::path::end(Path
);
439 if (B
->compare(".") == 0) {
441 else if (B
->compare("..") == 0)
442 llvm::sys::path::remove_filename(Buffer
);
444 llvm::sys::path::append(Buffer
, *B
);
447 if (Path
.endswith("/") || Path
.endswith("\\"))
448 Buffer
.append(1, Path
.back());
449 return Buffer
.c_str();
452 // Convert header path to canonical form.
453 // The canonical form is basically just use forward slashes, and remove "./".
454 // \param FilePath The file path, relative to the module map directory.
455 // \returns The file path in canonical form.
456 std::string
ModularizeUtilities::getCanonicalPath(StringRef FilePath
) {
457 std::string
Tmp(replaceDotDot(FilePath
));
458 std::replace(Tmp
.begin(), Tmp
.end(), '\\', '/');
460 if (Tmp2
.startswith("./"))
461 Tmp
= std::string(Tmp2
.substr(2));
465 // Check for header file extension.
466 // If the file extension is .h, .inc, or missing, it's
467 // assumed to be a header.
468 // \param FileName The file name. Must not be a directory.
469 // \returns true if it has a header extension or no extension.
470 bool ModularizeUtilities::isHeader(StringRef FileName
) {
471 StringRef Extension
= llvm::sys::path::extension(FileName
);
472 if (Extension
.size() == 0)
474 if (Extension
.equals_insensitive(".h"))
476 if (Extension
.equals_insensitive(".inc"))
481 // Get directory path component from file path.
482 // \returns the component of the given path, which will be
483 // relative if the given path is relative, absolute if the
484 // given path is absolute, or "." if the path has no leading
486 std::string
ModularizeUtilities::getDirectoryFromPath(StringRef Path
) {
487 SmallString
<256> Directory(Path
);
488 sys::path::remove_filename(Directory
);
489 if (Directory
.size() == 0)
491 return std::string(Directory
.str());
494 // Add unique problem file.
495 // Also standardizes the path.
496 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath
) {
497 FilePath
= getCanonicalPath(FilePath
);
498 // Don't add if already present.
499 for(auto &TestFilePath
: ProblemFileNames
) {
500 if (TestFilePath
== FilePath
)
503 ProblemFileNames
.push_back(FilePath
);
506 // Add file with no compile errors.
507 // Also standardizes the path.
508 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath
) {
509 FilePath
= getCanonicalPath(FilePath
);
510 GoodFileNames
.push_back(FilePath
);
513 // List problem files.
514 void ModularizeUtilities::displayProblemFiles() {
515 errs() << "\nThese are the files with possible errors:\n\n";
516 for (auto &ProblemFile
: ProblemFileNames
) {
517 errs() << ProblemFile
<< "\n";
521 // List files with no problems.
522 void ModularizeUtilities::displayGoodFiles() {
523 errs() << "\nThese are the files with no detected errors:\n\n";
524 for (auto &GoodFile
: HeaderFileNames
) {
526 for (auto &ProblemFile
: ProblemFileNames
) {
527 if (ProblemFile
== GoodFile
) {
533 errs() << GoodFile
<< "\n";
537 // List files with problem files commented out.
538 void ModularizeUtilities::displayCombinedFiles() {
540 "\nThese are the combined files, with problem files preceded by #:\n\n";
541 for (auto &File
: HeaderFileNames
) {
543 for (auto &ProblemFile
: ProblemFileNames
) {
544 if (ProblemFile
== File
) {
549 errs() << (Good
? "" : "#") << File
<< "\n";