1 //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the pruning of a directory based on least recently used.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/CachePruning.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Support/Errc.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
23 #define DEBUG_TYPE "cache-pruning"
26 #include <system_error>
32 sys::TimePoint
<> Time
;
36 /// Used to determine which files to prune first. Also used to determine
37 /// set membership, so must take into account all fields.
38 bool operator<(const FileInfo
&Other
) const {
39 return std::tie(Time
, Other
.Size
, Path
) <
40 std::tie(Other
.Time
, Size
, Other
.Path
);
43 } // anonymous namespace
45 /// Write a new timestamp file with the given path. This is used for the pruning
47 static void writeTimestampFile(StringRef TimestampFile
) {
49 raw_fd_ostream
Out(TimestampFile
.str(), EC
, sys::fs::OF_None
);
52 static Expected
<std::chrono::seconds
> parseDuration(StringRef Duration
) {
54 return make_error
<StringError
>("Duration must not be empty",
55 inconvertibleErrorCode());
57 StringRef NumStr
= Duration
.slice(0, Duration
.size()-1);
59 if (NumStr
.getAsInteger(0, Num
))
60 return make_error
<StringError
>("'" + NumStr
+ "' not an integer",
61 inconvertibleErrorCode());
63 switch (Duration
.back()) {
65 return std::chrono::seconds(Num
);
67 return std::chrono::minutes(Num
);
69 return std::chrono::hours(Num
);
71 return make_error
<StringError
>("'" + Duration
+
72 "' must end with one of 's', 'm' or 'h'",
73 inconvertibleErrorCode());
77 Expected
<CachePruningPolicy
>
78 llvm::parseCachePruningPolicy(StringRef PolicyStr
) {
79 CachePruningPolicy Policy
;
80 std::pair
<StringRef
, StringRef
> P
= {"", PolicyStr
};
81 while (!P
.second
.empty()) {
82 P
= P
.second
.split(':');
85 std::tie(Key
, Value
) = P
.first
.split('=');
86 if (Key
== "prune_interval") {
87 auto DurationOrErr
= parseDuration(Value
);
89 return DurationOrErr
.takeError();
90 Policy
.Interval
= *DurationOrErr
;
91 } else if (Key
== "prune_after") {
92 auto DurationOrErr
= parseDuration(Value
);
94 return DurationOrErr
.takeError();
95 Policy
.Expiration
= *DurationOrErr
;
96 } else if (Key
== "cache_size") {
97 if (Value
.back() != '%')
98 return make_error
<StringError
>("'" + Value
+ "' must be a percentage",
99 inconvertibleErrorCode());
100 StringRef SizeStr
= Value
.drop_back();
102 if (SizeStr
.getAsInteger(0, Size
))
103 return make_error
<StringError
>("'" + SizeStr
+ "' not an integer",
104 inconvertibleErrorCode());
106 return make_error
<StringError
>("'" + SizeStr
+
107 "' must be between 0 and 100",
108 inconvertibleErrorCode());
109 Policy
.MaxSizePercentageOfAvailableSpace
= Size
;
110 } else if (Key
== "cache_size_bytes") {
112 switch (tolower(Value
.back())) {
115 Value
= Value
.drop_back();
119 Value
= Value
.drop_back();
122 Mult
= 1024 * 1024 * 1024;
123 Value
= Value
.drop_back();
127 if (Value
.getAsInteger(0, Size
))
128 return make_error
<StringError
>("'" + Value
+ "' not an integer",
129 inconvertibleErrorCode());
130 Policy
.MaxSizeBytes
= Size
* Mult
;
131 } else if (Key
== "cache_size_files") {
132 if (Value
.getAsInteger(0, Policy
.MaxSizeFiles
))
133 return make_error
<StringError
>("'" + Value
+ "' not an integer",
134 inconvertibleErrorCode());
136 return make_error
<StringError
>("Unknown key: '" + Key
+ "'",
137 inconvertibleErrorCode());
144 /// Prune the cache of files that haven't been accessed in a long time.
145 bool llvm::pruneCache(StringRef Path
, CachePruningPolicy Policy
,
146 const std::vector
<std::unique_ptr
<MemoryBuffer
>> &Files
) {
147 using namespace std::chrono
;
153 if (sys::fs::is_directory(Path
, isPathDir
))
159 Policy
.MaxSizePercentageOfAvailableSpace
=
160 std::min(Policy
.MaxSizePercentageOfAvailableSpace
, 100u);
162 if (Policy
.Expiration
== seconds(0) &&
163 Policy
.MaxSizePercentageOfAvailableSpace
== 0 &&
164 Policy
.MaxSizeBytes
== 0 && Policy
.MaxSizeFiles
== 0) {
165 LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
166 // Nothing will be pruned, early exit
170 // Try to stat() the timestamp file.
171 SmallString
<128> TimestampFile(Path
);
172 sys::path::append(TimestampFile
, "llvmcache.timestamp");
173 sys::fs::file_status FileStatus
;
174 const auto CurrentTime
= system_clock::now();
175 if (auto EC
= sys::fs::status(TimestampFile
, FileStatus
)) {
176 if (EC
== errc::no_such_file_or_directory
) {
177 // If the timestamp file wasn't there, create one now.
178 writeTimestampFile(TimestampFile
);
184 if (!Policy
.Interval
)
186 if (Policy
.Interval
!= seconds(0)) {
187 // Check whether the time stamp is older than our pruning interval.
188 // If not, do nothing.
189 const auto TimeStampModTime
= FileStatus
.getLastModificationTime();
190 auto TimeStampAge
= CurrentTime
- TimeStampModTime
;
191 if (TimeStampAge
<= *Policy
.Interval
) {
192 LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
193 << duration_cast
<seconds
>(TimeStampAge
).count()
194 << "s old), do not prune.\n");
198 // Write a new timestamp file so that nobody else attempts to prune.
199 // There is a benign race condition here, if two processes happen to
200 // notice at the same time that the timestamp is out-of-date.
201 writeTimestampFile(TimestampFile
);
204 // Keep track of files to delete to get below the size limit.
205 // Order by time of last use so that recently used files are preserved.
206 std::set
<FileInfo
> FileInfos
;
207 uint64_t TotalSize
= 0;
209 // Walk the entire directory cache, looking for unused files.
211 SmallString
<128> CachePathNative
;
212 sys::path::native(Path
, CachePathNative
);
213 // Walk all of the files within this directory.
214 for (sys::fs::directory_iterator
File(CachePathNative
, EC
), FileEnd
;
215 File
!= FileEnd
&& !EC
; File
.increment(EC
)) {
216 // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
217 // includes the timestamp file as well as any files created by the user.
218 // This acts as a safeguard against data loss if the user specifies the
219 // wrong directory as their cache directory.
220 StringRef filename
= sys::path::filename(File
->path());
221 if (!filename
.startswith("llvmcache-") && !filename
.startswith("Thin-"))
224 // Look at this file. If we can't stat it, there's nothing interesting
226 ErrorOr
<sys::fs::basic_file_status
> StatusOrErr
= File
->status();
228 LLVM_DEBUG(dbgs() << "Ignore " << File
->path() << " (can't stat)\n");
232 // If the file hasn't been used recently enough, delete it
233 const auto FileAccessTime
= StatusOrErr
->getLastAccessedTime();
234 auto FileAge
= CurrentTime
- FileAccessTime
;
235 if (Policy
.Expiration
!= seconds(0) && FileAge
> Policy
.Expiration
) {
236 LLVM_DEBUG(dbgs() << "Remove " << File
->path() << " ("
237 << duration_cast
<seconds
>(FileAge
).count()
239 sys::fs::remove(File
->path());
243 // Leave it here for now, but add it to the list of size-based pruning.
244 TotalSize
+= StatusOrErr
->getSize();
245 FileInfos
.insert({FileAccessTime
, StatusOrErr
->getSize(), File
->path()});
248 auto FileInfo
= FileInfos
.begin();
249 size_t NumFiles
= FileInfos
.size();
251 auto RemoveCacheFile
= [&]() {
253 sys::fs::remove(FileInfo
->Path
);
255 TotalSize
-= FileInfo
->Size
;
257 LLVM_DEBUG(dbgs() << " - Remove " << FileInfo
->Path
<< " (size "
258 << FileInfo
->Size
<< "), new occupancy is " << TotalSize
263 // files.size() is greater the number of inputs by one. However, a timestamp
264 // file is created and stored in the cache directory if --thinlto-cache-policy
265 // option is used. Therefore, files.size() is used as ActualNums.
266 const size_t ActualNums
= Files
.size();
267 if (Policy
.MaxSizeFiles
&& ActualNums
> Policy
.MaxSizeFiles
)
269 << "ThinLTO cache pruning happens since the number of created files ("
270 << ActualNums
<< ") exceeds the maximum number of files ("
271 << Policy
.MaxSizeFiles
272 << "); consider adjusting --thinlto-cache-policy\n";
274 // Prune for number of files.
275 if (Policy
.MaxSizeFiles
)
276 while (NumFiles
> Policy
.MaxSizeFiles
)
279 // Prune for size now if needed
280 if (Policy
.MaxSizePercentageOfAvailableSpace
> 0 || Policy
.MaxSizeBytes
> 0) {
281 auto ErrOrSpaceInfo
= sys::fs::disk_space(Path
);
282 if (!ErrOrSpaceInfo
) {
283 report_fatal_error("Can't get available size");
285 sys::fs::space_info SpaceInfo
= ErrOrSpaceInfo
.get();
286 auto AvailableSpace
= TotalSize
+ SpaceInfo
.free
;
288 if (Policy
.MaxSizePercentageOfAvailableSpace
== 0)
289 Policy
.MaxSizePercentageOfAvailableSpace
= 100;
290 if (Policy
.MaxSizeBytes
== 0)
291 Policy
.MaxSizeBytes
= AvailableSpace
;
292 auto TotalSizeTarget
= std::min
<uint64_t>(
293 AvailableSpace
* Policy
.MaxSizePercentageOfAvailableSpace
/ 100ull,
294 Policy
.MaxSizeBytes
);
296 LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize
) / AvailableSpace
)
298 << Policy
.MaxSizePercentageOfAvailableSpace
<< "%, "
299 << Policy
.MaxSizeBytes
<< " bytes\n");
301 size_t ActualSizes
= 0;
302 for (const auto &File
: Files
)
304 ActualSizes
+= File
->getBufferSize();
306 if (ActualSizes
> TotalSizeTarget
)
308 << "ThinLTO cache pruning happens since the total size of the cache "
309 "files consumed by the current link job ("
310 << ActualSizes
<< " bytes) exceeds maximum cache size ("
312 << " bytes); consider adjusting --thinlto-cache-policy\n";
314 // Remove the oldest accessed files first, till we get below the threshold.
315 while (TotalSize
> TotalSizeTarget
&& FileInfo
!= FileInfos
.end())