1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/SmallVectorMemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
15 using namespace clang
;
16 using namespace tooling
;
17 using namespace dependencies
;
19 llvm::ErrorOr
<DependencyScanningWorkerFilesystem::TentativeEntry
>
20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename
) {
21 // Load the file and its content from the file system.
22 auto MaybeFile
= getUnderlyingFS().openFileForRead(Filename
);
24 return MaybeFile
.getError();
25 auto File
= std::move(*MaybeFile
);
27 auto MaybeStat
= File
->status();
29 return MaybeStat
.getError();
30 auto Stat
= std::move(*MaybeStat
);
32 auto MaybeBuffer
= File
->getBuffer(Stat
.getName());
34 return MaybeBuffer
.getError();
35 auto Buffer
= std::move(*MaybeBuffer
);
37 // If the file size changed between read and stat, pretend it didn't.
38 if (Stat
.getSize() != Buffer
->getBufferSize())
39 Stat
= llvm::vfs::Status::copyWithNewSize(Stat
, Buffer
->getBufferSize());
41 return TentativeEntry(Stat
, std::move(Buffer
));
44 EntryRef
DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45 const CachedFileSystemEntry
&Entry
, StringRef Filename
, bool Disable
) {
46 if (Entry
.isError() || Entry
.isDirectory() || Disable
||
47 !shouldScanForDirectives(Filename
))
48 return EntryRef(Filename
, Entry
);
50 CachedFileContents
*Contents
= Entry
.getCachedContents();
51 assert(Contents
&& "contents not initialized");
53 // Double-checked locking.
54 if (Contents
->DepDirectives
.load())
55 return EntryRef(Filename
, Entry
);
57 std::lock_guard
<std::mutex
> GuardLock(Contents
->ValueLock
);
59 // Double-checked locking.
60 if (Contents
->DepDirectives
.load())
61 return EntryRef(Filename
, Entry
);
63 SmallVector
<dependency_directives_scan::Directive
, 64> Directives
;
64 // Scan the file for preprocessor directives that might affect the
66 if (scanSourceForDependencyDirectives(Contents
->Original
->getBuffer(),
67 Contents
->DepDirectiveTokens
,
69 Contents
->DepDirectiveTokens
.clear();
70 // FIXME: Propagate the diagnostic if desired by the client.
71 Contents
->DepDirectives
.store(new std::optional
<DependencyDirectivesTy
>());
72 return EntryRef(Filename
, Entry
);
75 // This function performed double-checked locking using `DepDirectives`.
76 // Assigning it must be the last thing this function does, otherwise other
77 // threads may skip the
78 // critical section (`DepDirectives != nullptr`), leading to a data race.
79 Contents
->DepDirectives
.store(
80 new std::optional
<DependencyDirectivesTy
>(std::move(Directives
)));
81 return EntryRef(Filename
, Entry
);
84 DependencyScanningFilesystemSharedCache::
85 DependencyScanningFilesystemSharedCache() {
86 // This heuristic was chosen using a empirical testing on a
87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88 // sharding gives a performance edge by reducing the lock contention.
89 // FIXME: A better heuristic might also consider the OS to account for
90 // the different cost of lock contention on different OSes.
92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93 CacheShards
= std::make_unique
<CacheShard
[]>(NumShards
);
96 DependencyScanningFilesystemSharedCache::CacheShard
&
97 DependencyScanningFilesystemSharedCache::getShardForFilename(
98 StringRef Filename
) const {
99 assert(llvm::sys::path::is_absolute_gnu(Filename
));
100 return CacheShards
[llvm::hash_value(Filename
) % NumShards
];
103 DependencyScanningFilesystemSharedCache::CacheShard
&
104 DependencyScanningFilesystemSharedCache::getShardForUID(
105 llvm::sys::fs::UniqueID UID
) const {
106 auto Hash
= llvm::hash_combine(UID
.getDevice(), UID
.getFile());
107 return CacheShards
[Hash
% NumShards
];
110 const CachedFileSystemEntry
*
111 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
112 StringRef Filename
) const {
113 assert(llvm::sys::path::is_absolute_gnu(Filename
));
114 std::lock_guard
<std::mutex
> LockGuard(CacheLock
);
115 auto It
= EntriesByFilename
.find(Filename
);
116 return It
== EntriesByFilename
.end() ? nullptr : It
->getValue();
119 const CachedFileSystemEntry
*
120 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
121 llvm::sys::fs::UniqueID UID
) const {
122 std::lock_guard
<std::mutex
> LockGuard(CacheLock
);
123 auto It
= EntriesByUID
.find(UID
);
124 return It
== EntriesByUID
.end() ? nullptr : It
->getSecond();
127 const CachedFileSystemEntry
&
128 DependencyScanningFilesystemSharedCache::CacheShard::
129 getOrEmplaceEntryForFilename(StringRef Filename
,
130 llvm::ErrorOr
<llvm::vfs::Status
> Stat
) {
131 std::lock_guard
<std::mutex
> LockGuard(CacheLock
);
132 auto Insertion
= EntriesByFilename
.insert({Filename
, nullptr});
133 if (Insertion
.second
)
134 Insertion
.first
->second
=
135 new (EntryStorage
.Allocate()) CachedFileSystemEntry(std::move(Stat
));
136 return *Insertion
.first
->second
;
139 const CachedFileSystemEntry
&
140 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
141 llvm::sys::fs::UniqueID UID
, llvm::vfs::Status Stat
,
142 std::unique_ptr
<llvm::MemoryBuffer
> Contents
) {
143 std::lock_guard
<std::mutex
> LockGuard(CacheLock
);
144 auto Insertion
= EntriesByUID
.insert({UID
, nullptr});
145 if (Insertion
.second
) {
146 CachedFileContents
*StoredContents
= nullptr;
148 StoredContents
= new (ContentsStorage
.Allocate())
149 CachedFileContents(std::move(Contents
));
150 Insertion
.first
->second
= new (EntryStorage
.Allocate())
151 CachedFileSystemEntry(std::move(Stat
), StoredContents
);
153 return *Insertion
.first
->second
;
156 const CachedFileSystemEntry
&
157 DependencyScanningFilesystemSharedCache::CacheShard::
158 getOrInsertEntryForFilename(StringRef Filename
,
159 const CachedFileSystemEntry
&Entry
) {
160 std::lock_guard
<std::mutex
> LockGuard(CacheLock
);
161 return *EntriesByFilename
.insert({Filename
, &Entry
}).first
->getValue();
164 /// Whitelist file extensions that should be minimized, treating no extension as
165 /// a source file that should be minimized.
167 /// This is kinda hacky, it would be better if we knew what kind of file Clang
168 /// was expecting instead.
169 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename
) {
170 StringRef Ext
= llvm::sys::path::extension(Filename
);
172 return true; // C++ standard library
173 return llvm::StringSwitch
<bool>(Ext
)
174 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
175 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
176 .CasesLower(".m", ".mm", true)
177 .CasesLower(".i", ".ii", ".mi", ".mmi", true)
178 .CasesLower(".def", ".inc", true)
182 static bool shouldCacheStatFailures(StringRef Filename
) {
183 StringRef Ext
= llvm::sys::path::extension(Filename
);
185 return false; // This may be the module cache directory.
186 // Only cache stat failures on files that are not expected to change during
188 StringRef FName
= llvm::sys::path::filename(Filename
);
189 if (FName
== "module.modulemap" || FName
== "module.map")
191 return shouldScanForDirectivesBasedOnExtension(Filename
);
194 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
195 DependencyScanningFilesystemSharedCache
&SharedCache
,
196 IntrusiveRefCntPtr
<llvm::vfs::FileSystem
> FS
)
197 : ProxyFileSystem(std::move(FS
)), SharedCache(SharedCache
),
198 WorkingDirForCacheLookup(llvm::errc::invalid_argument
) {
199 updateWorkingDirForCacheLookup();
202 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
203 StringRef Filename
) {
204 return shouldScanForDirectivesBasedOnExtension(Filename
);
207 const CachedFileSystemEntry
&
208 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
209 TentativeEntry TEntry
) {
210 auto &Shard
= SharedCache
.getShardForUID(TEntry
.Status
.getUniqueID());
211 return Shard
.getOrEmplaceEntryForUID(TEntry
.Status
.getUniqueID(),
212 std::move(TEntry
.Status
),
213 std::move(TEntry
.Contents
));
216 const CachedFileSystemEntry
*
217 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
218 StringRef Filename
) {
219 if (const auto *Entry
= LocalCache
.findEntryByFilename(Filename
))
221 auto &Shard
= SharedCache
.getShardForFilename(Filename
);
222 if (const auto *Entry
= Shard
.findEntryByFilename(Filename
))
223 return &LocalCache
.insertEntryForFilename(Filename
, *Entry
);
227 llvm::ErrorOr
<const CachedFileSystemEntry
&>
228 DependencyScanningWorkerFilesystem::computeAndStoreResult(
229 StringRef OriginalFilename
, StringRef FilenameForLookup
) {
230 llvm::ErrorOr
<llvm::vfs::Status
> Stat
=
231 getUnderlyingFS().status(OriginalFilename
);
233 if (!shouldCacheStatFailures(OriginalFilename
))
234 return Stat
.getError();
236 getOrEmplaceSharedEntryForFilename(FilenameForLookup
, Stat
.getError());
237 return insertLocalEntryForFilename(FilenameForLookup
, Entry
);
240 if (const auto *Entry
= findSharedEntryByUID(*Stat
))
241 return insertLocalEntryForFilename(FilenameForLookup
, *Entry
);
244 Stat
->isDirectory() ? TentativeEntry(*Stat
) : readFile(OriginalFilename
);
246 const CachedFileSystemEntry
*SharedEntry
= [&]() {
248 const auto &UIDEntry
= getOrEmplaceSharedEntryForUID(std::move(*TEntry
));
249 return &getOrInsertSharedEntryForFilename(FilenameForLookup
, UIDEntry
);
251 return &getOrEmplaceSharedEntryForFilename(FilenameForLookup
,
255 return insertLocalEntryForFilename(FilenameForLookup
, *SharedEntry
);
258 llvm::ErrorOr
<EntryRef
>
259 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
260 StringRef OriginalFilename
, bool DisableDirectivesScanning
) {
261 StringRef FilenameForLookup
;
262 SmallString
<256> PathBuf
;
263 if (llvm::sys::path::is_absolute_gnu(OriginalFilename
)) {
264 FilenameForLookup
= OriginalFilename
;
265 } else if (!WorkingDirForCacheLookup
) {
266 return WorkingDirForCacheLookup
.getError();
268 StringRef RelFilename
= OriginalFilename
;
269 RelFilename
.consume_front("./");
270 PathBuf
= *WorkingDirForCacheLookup
;
271 llvm::sys::path::append(PathBuf
, RelFilename
);
272 FilenameForLookup
= PathBuf
.str();
274 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup
));
275 if (const auto *Entry
=
276 findEntryByFilenameWithWriteThrough(FilenameForLookup
))
277 return scanForDirectivesIfNecessary(*Entry
, OriginalFilename
,
278 DisableDirectivesScanning
)
280 auto MaybeEntry
= computeAndStoreResult(OriginalFilename
, FilenameForLookup
);
282 return MaybeEntry
.getError();
283 return scanForDirectivesIfNecessary(*MaybeEntry
, OriginalFilename
,
284 DisableDirectivesScanning
)
288 llvm::ErrorOr
<llvm::vfs::Status
>
289 DependencyScanningWorkerFilesystem::status(const Twine
&Path
) {
290 SmallString
<256> OwnedFilename
;
291 StringRef Filename
= Path
.toStringRef(OwnedFilename
);
293 if (Filename
.ends_with(".pcm"))
294 return getUnderlyingFS().status(Path
);
296 llvm::ErrorOr
<EntryRef
> Result
= getOrCreateFileSystemEntry(Filename
);
298 return Result
.getError();
299 return Result
->getStatus();
304 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
306 class DepScanFile final
: public llvm::vfs::File
{
308 DepScanFile(std::unique_ptr
<llvm::MemoryBuffer
> Buffer
,
309 llvm::vfs::Status Stat
)
310 : Buffer(std::move(Buffer
)), Stat(std::move(Stat
)) {}
312 static llvm::ErrorOr
<std::unique_ptr
<llvm::vfs::File
>> create(EntryRef Entry
);
314 llvm::ErrorOr
<llvm::vfs::Status
> status() override
{ return Stat
; }
316 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>>
317 getBuffer(const Twine
&Name
, int64_t FileSize
, bool RequiresNullTerminator
,
318 bool IsVolatile
) override
{
319 return std::move(Buffer
);
322 std::error_code
close() override
{ return {}; }
325 std::unique_ptr
<llvm::MemoryBuffer
> Buffer
;
326 llvm::vfs::Status Stat
;
329 } // end anonymous namespace
331 llvm::ErrorOr
<std::unique_ptr
<llvm::vfs::File
>>
332 DepScanFile::create(EntryRef Entry
) {
333 assert(!Entry
.isError() && "error");
335 if (Entry
.isDirectory())
336 return std::make_error_code(std::errc::is_a_directory
);
338 auto Result
= std::make_unique
<DepScanFile
>(
339 llvm::MemoryBuffer::getMemBuffer(Entry
.getContents(),
340 Entry
.getStatus().getName(),
341 /*RequiresNullTerminator=*/false),
344 return llvm::ErrorOr
<std::unique_ptr
<llvm::vfs::File
>>(
345 std::unique_ptr
<llvm::vfs::File
>(std::move(Result
)));
348 llvm::ErrorOr
<std::unique_ptr
<llvm::vfs::File
>>
349 DependencyScanningWorkerFilesystem::openFileForRead(const Twine
&Path
) {
350 SmallString
<256> OwnedFilename
;
351 StringRef Filename
= Path
.toStringRef(OwnedFilename
);
353 if (Filename
.ends_with(".pcm"))
354 return getUnderlyingFS().openFileForRead(Path
);
356 llvm::ErrorOr
<EntryRef
> Result
= getOrCreateFileSystemEntry(Filename
);
358 return Result
.getError();
359 return DepScanFile::create(Result
.get());
362 std::error_code
DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
364 std::error_code EC
= ProxyFileSystem::setCurrentWorkingDirectory(Path
);
365 updateWorkingDirForCacheLookup();
369 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
370 llvm::ErrorOr
<std::string
> CWD
=
371 getUnderlyingFS().getCurrentWorkingDirectory();
373 WorkingDirForCacheLookup
= CWD
.getError();
374 } else if (!llvm::sys::path::is_absolute_gnu(*CWD
)) {
375 WorkingDirForCacheLookup
= llvm::errc::invalid_argument
;
377 WorkingDirForCacheLookup
= *CWD
;
379 assert(!WorkingDirForCacheLookup
||
380 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup
));