1 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the MemoryBuffer interface.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Support/MemoryBuffer.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/Config/config.h"
17 #include "llvm/Support/Alignment.h"
18 #include "llvm/Support/Errc.h"
19 #include "llvm/Support/Error.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/Process.h"
23 #include "llvm/Support/Program.h"
24 #include "llvm/Support/SmallVectorMemoryBuffer.h"
29 #include <sys/types.h>
30 #include <system_error>
31 #if !defined(_MSC_VER) && !defined(__MINGW32__)
38 #include "llvm/Support/AutoConvert.h"
42 //===----------------------------------------------------------------------===//
43 // MemoryBuffer implementation itself.
44 //===----------------------------------------------------------------------===//
46 MemoryBuffer::~MemoryBuffer() = default;
48 /// init - Initialize this MemoryBuffer as a reference to externally allocated
49 /// memory, memory that we know is already null terminated.
50 void MemoryBuffer::init(const char *BufStart
, const char *BufEnd
,
51 bool RequiresNullTerminator
) {
52 assert((!RequiresNullTerminator
|| BufEnd
[0] == 0) &&
53 "Buffer is not null terminated!");
54 BufferStart
= BufStart
;
58 //===----------------------------------------------------------------------===//
59 // MemoryBufferMem implementation.
60 //===----------------------------------------------------------------------===//
62 /// CopyStringRef - Copies contents of a StringRef into a block of memory and
63 /// null-terminates it.
64 static void CopyStringRef(char *Memory
, StringRef Data
) {
66 memcpy(Memory
, Data
.data(), Data
.size());
67 Memory
[Data
.size()] = 0; // Null terminate string.
71 struct NamedBufferAlloc
{
73 NamedBufferAlloc(const Twine
&Name
) : Name(Name
) {}
77 void *operator new(size_t N
, const NamedBufferAlloc
&Alloc
) {
78 SmallString
<256> NameBuf
;
79 StringRef NameRef
= Alloc
.Name
.toStringRef(NameBuf
);
81 // We use malloc() and manually handle it returning null instead of calling
82 // operator new because we need all uses of NamedBufferAlloc to be
83 // deallocated with a call to free() due to needing to use malloc() in
84 // WritableMemoryBuffer::getNewUninitMemBuffer() to work around the out-of-
85 // memory handler installed by default in LLVM. See operator delete() member
86 // functions within this file for the paired call to free().
88 static_cast<char *>(std::malloc(N
+ sizeof(size_t) + NameRef
.size() + 1));
90 llvm::report_bad_alloc_error("Allocation failed");
91 *reinterpret_cast<size_t *>(Mem
+ N
) = NameRef
.size();
92 CopyStringRef(Mem
+ N
+ sizeof(size_t), NameRef
);
97 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
99 class MemoryBufferMem
: public MB
{
101 MemoryBufferMem(StringRef InputData
, bool RequiresNullTerminator
) {
102 MemoryBuffer::init(InputData
.begin(), InputData
.end(),
103 RequiresNullTerminator
);
106 /// Disable sized deallocation for MemoryBufferMem, because it has
107 /// tail-allocated data.
108 void operator delete(void *p
) { std::free(p
); }
110 StringRef
getBufferIdentifier() const override
{
111 // The name is stored after the class itself.
112 return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t),
113 *reinterpret_cast<const size_t *>(this + 1));
116 MemoryBuffer::BufferKind
getBufferKind() const override
{
117 return MemoryBuffer::MemoryBuffer_Malloc
;
122 template <typename MB
>
123 static ErrorOr
<std::unique_ptr
<MB
>>
124 getFileAux(const Twine
&Filename
, uint64_t MapSize
, uint64_t Offset
,
125 bool IsText
, bool RequiresNullTerminator
, bool IsVolatile
,
126 std::optional
<Align
> Alignment
);
128 std::unique_ptr
<MemoryBuffer
>
129 MemoryBuffer::getMemBuffer(StringRef InputData
, StringRef BufferName
,
130 bool RequiresNullTerminator
) {
131 auto *Ret
= new (NamedBufferAlloc(BufferName
))
132 MemoryBufferMem
<MemoryBuffer
>(InputData
, RequiresNullTerminator
);
133 return std::unique_ptr
<MemoryBuffer
>(Ret
);
136 std::unique_ptr
<MemoryBuffer
>
137 MemoryBuffer::getMemBuffer(MemoryBufferRef Ref
, bool RequiresNullTerminator
) {
138 return std::unique_ptr
<MemoryBuffer
>(getMemBuffer(
139 Ref
.getBuffer(), Ref
.getBufferIdentifier(), RequiresNullTerminator
));
142 static ErrorOr
<std::unique_ptr
<WritableMemoryBuffer
>>
143 getMemBufferCopyImpl(StringRef InputData
, const Twine
&BufferName
) {
145 WritableMemoryBuffer::getNewUninitMemBuffer(InputData
.size(), BufferName
);
147 return make_error_code(errc::not_enough_memory
);
148 // Calling memcpy with null src/dst is UB, and an empty StringRef is
149 // represented with {nullptr, 0}.
150 llvm::copy(InputData
, Buf
->getBufferStart());
151 return std::move(Buf
);
154 std::unique_ptr
<MemoryBuffer
>
155 MemoryBuffer::getMemBufferCopy(StringRef InputData
, const Twine
&BufferName
) {
156 auto Buf
= getMemBufferCopyImpl(InputData
, BufferName
);
158 return std::move(*Buf
);
162 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
163 MemoryBuffer::getFileOrSTDIN(const Twine
&Filename
, bool IsText
,
164 bool RequiresNullTerminator
,
165 std::optional
<Align
> Alignment
) {
166 SmallString
<256> NameBuf
;
167 StringRef NameRef
= Filename
.toStringRef(NameBuf
);
171 return getFile(Filename
, IsText
, RequiresNullTerminator
,
172 /*IsVolatile=*/false, Alignment
);
175 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
176 MemoryBuffer::getFileSlice(const Twine
&FilePath
, uint64_t MapSize
,
177 uint64_t Offset
, bool IsVolatile
,
178 std::optional
<Align
> Alignment
) {
179 return getFileAux
<MemoryBuffer
>(FilePath
, MapSize
, Offset
, /*IsText=*/false,
180 /*RequiresNullTerminator=*/false, IsVolatile
,
184 //===----------------------------------------------------------------------===//
185 // MemoryBuffer::getFile implementation.
186 //===----------------------------------------------------------------------===//
190 template <typename MB
>
191 constexpr sys::fs::mapped_file_region::mapmode Mapmode
=
192 sys::fs::mapped_file_region::readonly
;
194 constexpr sys::fs::mapped_file_region::mapmode Mapmode
<MemoryBuffer
> =
195 sys::fs::mapped_file_region::readonly
;
197 constexpr sys::fs::mapped_file_region::mapmode Mapmode
<WritableMemoryBuffer
> =
198 sys::fs::mapped_file_region::priv
;
200 constexpr sys::fs::mapped_file_region::mapmode
201 Mapmode
<WriteThroughMemoryBuffer
> = sys::fs::mapped_file_region::readwrite
;
203 /// Memory maps a file descriptor using sys::fs::mapped_file_region.
205 /// This handles converting the offset into a legal offset on the platform.
206 template<typename MB
>
207 class MemoryBufferMMapFile
: public MB
{
208 sys::fs::mapped_file_region MFR
;
210 static uint64_t getLegalMapOffset(uint64_t Offset
) {
211 return Offset
& ~(sys::fs::mapped_file_region::alignment() - 1);
214 static uint64_t getLegalMapSize(uint64_t Len
, uint64_t Offset
) {
215 return Len
+ (Offset
- getLegalMapOffset(Offset
));
218 const char *getStart(uint64_t Len
, uint64_t Offset
) {
219 return MFR
.const_data() + (Offset
- getLegalMapOffset(Offset
));
223 MemoryBufferMMapFile(bool RequiresNullTerminator
, sys::fs::file_t FD
, uint64_t Len
,
224 uint64_t Offset
, std::error_code
&EC
)
225 : MFR(FD
, Mapmode
<MB
>, getLegalMapSize(Len
, Offset
),
226 getLegalMapOffset(Offset
), EC
) {
228 const char *Start
= getStart(Len
, Offset
);
229 MemoryBuffer::init(Start
, Start
+ Len
, RequiresNullTerminator
);
233 /// Disable sized deallocation for MemoryBufferMMapFile, because it has
234 /// tail-allocated data.
235 void operator delete(void *p
) { std::free(p
); }
237 StringRef
getBufferIdentifier() const override
{
238 // The name is stored after the class itself.
239 return StringRef(reinterpret_cast<const char *>(this + 1) + sizeof(size_t),
240 *reinterpret_cast<const size_t *>(this + 1));
243 MemoryBuffer::BufferKind
getBufferKind() const override
{
244 return MemoryBuffer::MemoryBuffer_MMap
;
247 void dontNeedIfMmap() override
{ MFR
.dontNeed(); }
251 static ErrorOr
<std::unique_ptr
<WritableMemoryBuffer
>>
252 getMemoryBufferForStream(sys::fs::file_t FD
, const Twine
&BufferName
) {
253 SmallString
<sys::fs::DefaultReadChunkSize
> Buffer
;
254 if (Error E
= sys::fs::readNativeFileToEOF(FD
, Buffer
))
255 return errorToErrorCode(std::move(E
));
256 return getMemBufferCopyImpl(Buffer
, BufferName
);
259 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
260 MemoryBuffer::getFile(const Twine
&Filename
, bool IsText
,
261 bool RequiresNullTerminator
, bool IsVolatile
,
262 std::optional
<Align
> Alignment
) {
263 return getFileAux
<MemoryBuffer
>(Filename
, /*MapSize=*/-1, /*Offset=*/0,
264 IsText
, RequiresNullTerminator
, IsVolatile
,
268 template <typename MB
>
269 static ErrorOr
<std::unique_ptr
<MB
>>
270 getOpenFileImpl(sys::fs::file_t FD
, const Twine
&Filename
, uint64_t FileSize
,
271 uint64_t MapSize
, int64_t Offset
, bool RequiresNullTerminator
,
272 bool IsVolatile
, std::optional
<Align
> Alignment
);
274 template <typename MB
>
275 static ErrorOr
<std::unique_ptr
<MB
>>
276 getFileAux(const Twine
&Filename
, uint64_t MapSize
, uint64_t Offset
,
277 bool IsText
, bool RequiresNullTerminator
, bool IsVolatile
,
278 std::optional
<Align
> Alignment
) {
279 Expected
<sys::fs::file_t
> FDOrErr
= sys::fs::openNativeFileForRead(
280 Filename
, IsText
? sys::fs::OF_TextWithCRLF
: sys::fs::OF_None
);
282 return errorToErrorCode(FDOrErr
.takeError());
283 sys::fs::file_t FD
= *FDOrErr
;
284 auto Ret
= getOpenFileImpl
<MB
>(FD
, Filename
, /*FileSize=*/-1, MapSize
, Offset
,
285 RequiresNullTerminator
, IsVolatile
, Alignment
);
286 sys::fs::closeFile(FD
);
290 ErrorOr
<std::unique_ptr
<WritableMemoryBuffer
>>
291 WritableMemoryBuffer::getFile(const Twine
&Filename
, bool IsVolatile
,
292 std::optional
<Align
> Alignment
) {
293 return getFileAux
<WritableMemoryBuffer
>(
294 Filename
, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false,
295 /*RequiresNullTerminator=*/false, IsVolatile
, Alignment
);
298 ErrorOr
<std::unique_ptr
<WritableMemoryBuffer
>>
299 WritableMemoryBuffer::getFileSlice(const Twine
&Filename
, uint64_t MapSize
,
300 uint64_t Offset
, bool IsVolatile
,
301 std::optional
<Align
> Alignment
) {
302 return getFileAux
<WritableMemoryBuffer
>(
303 Filename
, MapSize
, Offset
, /*IsText=*/false,
304 /*RequiresNullTerminator=*/false, IsVolatile
, Alignment
);
307 std::unique_ptr
<WritableMemoryBuffer
>
308 WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size
,
309 const Twine
&BufferName
,
310 std::optional
<Align
> Alignment
) {
311 using MemBuffer
= MemoryBufferMem
<WritableMemoryBuffer
>;
313 // Use 16-byte alignment if no alignment is specified.
314 Align BufAlign
= Alignment
.value_or(Align(16));
316 // Allocate space for the MemoryBuffer, the data and the name. It is important
317 // that MemoryBuffer and data are aligned so PointerIntPair works with them.
318 SmallString
<256> NameBuf
;
319 StringRef NameRef
= BufferName
.toStringRef(NameBuf
);
321 size_t StringLen
= sizeof(MemBuffer
) + sizeof(size_t) + NameRef
.size() + 1;
322 size_t RealLen
= StringLen
+ Size
+ 1 + BufAlign
.value();
323 if (RealLen
<= Size
) // Check for rollover.
325 // We use a call to malloc() rather than a call to a non-throwing operator
326 // new() because LLVM unconditionally installs an out of memory new handler
327 // when exceptions are disabled. This new handler intentionally crashes to
328 // aid with debugging, but that makes non-throwing new calls unhelpful.
329 // See MemoryBufferMem::operator delete() for the paired call to free(), and
330 // llvm::install_out_of_memory_new_handler() for the installation of the
331 // custom new handler.
332 char *Mem
= static_cast<char *>(std::malloc(RealLen
));
336 // The name is stored after the class itself.
337 *reinterpret_cast<size_t *>(Mem
+ sizeof(MemBuffer
)) = NameRef
.size();
338 CopyStringRef(Mem
+ sizeof(MemBuffer
) + sizeof(size_t), NameRef
);
340 // The buffer begins after the name and must be aligned.
341 char *Buf
= (char *)alignAddr(Mem
+ StringLen
, BufAlign
);
342 Buf
[Size
] = 0; // Null terminate buffer.
344 auto *Ret
= new (Mem
) MemBuffer(StringRef(Buf
, Size
), true);
345 return std::unique_ptr
<WritableMemoryBuffer
>(Ret
);
348 std::unique_ptr
<WritableMemoryBuffer
>
349 WritableMemoryBuffer::getNewMemBuffer(size_t Size
, const Twine
&BufferName
) {
350 auto SB
= WritableMemoryBuffer::getNewUninitMemBuffer(Size
, BufferName
);
353 memset(SB
->getBufferStart(), 0, Size
);
357 static bool shouldUseMmap(sys::fs::file_t FD
,
361 bool RequiresNullTerminator
,
365 // zOS Enhanced ASCII auto convert does not support mmap.
369 // mmap may leave the buffer without null terminator if the file size changed
370 // by the time the last page is mapped in, so avoid it if the file size is
372 if (IsVolatile
&& RequiresNullTerminator
)
375 // We don't use mmap for small files because this can severely fragment our
377 if (MapSize
< 4 * 4096 || MapSize
< (unsigned)PageSize
)
380 if (!RequiresNullTerminator
)
383 // If we don't know the file size, use fstat to find out. fstat on an open
384 // file descriptor is cheaper than stat on a random path.
385 // FIXME: this chunk of code is duplicated, but it avoids a fstat when
386 // RequiresNullTerminator = false and MapSize != -1.
387 if (FileSize
== size_t(-1)) {
388 sys::fs::file_status Status
;
389 if (sys::fs::status(FD
, Status
))
391 FileSize
= Status
.getSize();
394 // If we need a null terminator and the end of the map is inside the file,
395 // we cannot use mmap.
396 size_t End
= Offset
+ MapSize
;
397 assert(End
<= FileSize
);
401 // Don't try to map files that are exactly a multiple of the system page size
402 // if we need a null terminator.
403 if ((FileSize
& (PageSize
-1)) == 0)
406 #if defined(__CYGWIN__)
407 // Don't try to map files that are exactly a multiple of the physical page size
408 // if we need a null terminator.
409 // FIXME: We should reorganize again getPageSize() on Win32.
410 if ((FileSize
& (4096 - 1)) == 0)
417 static ErrorOr
<std::unique_ptr
<WriteThroughMemoryBuffer
>>
418 getReadWriteFile(const Twine
&Filename
, uint64_t FileSize
, uint64_t MapSize
,
420 Expected
<sys::fs::file_t
> FDOrErr
= sys::fs::openNativeFileForReadWrite(
421 Filename
, sys::fs::CD_OpenExisting
, sys::fs::OF_None
);
423 return errorToErrorCode(FDOrErr
.takeError());
424 sys::fs::file_t FD
= *FDOrErr
;
426 // Default is to map the full file.
427 if (MapSize
== uint64_t(-1)) {
428 // If we don't know the file size, use fstat to find out. fstat on an open
429 // file descriptor is cheaper than stat on a random path.
430 if (FileSize
== uint64_t(-1)) {
431 sys::fs::file_status Status
;
432 std::error_code EC
= sys::fs::status(FD
, Status
);
436 // If this not a file or a block device (e.g. it's a named pipe
437 // or character device), we can't mmap it, so error out.
438 sys::fs::file_type Type
= Status
.type();
439 if (Type
!= sys::fs::file_type::regular_file
&&
440 Type
!= sys::fs::file_type::block_file
)
441 return make_error_code(errc::invalid_argument
);
443 FileSize
= Status
.getSize();
449 std::unique_ptr
<WriteThroughMemoryBuffer
> Result(
450 new (NamedBufferAlloc(Filename
))
451 MemoryBufferMMapFile
<WriteThroughMemoryBuffer
>(false, FD
, MapSize
,
455 return std::move(Result
);
458 ErrorOr
<std::unique_ptr
<WriteThroughMemoryBuffer
>>
459 WriteThroughMemoryBuffer::getFile(const Twine
&Filename
, int64_t FileSize
) {
460 return getReadWriteFile(Filename
, FileSize
, FileSize
, 0);
463 /// Map a subrange of the specified file as a WritableMemoryBuffer.
464 ErrorOr
<std::unique_ptr
<WriteThroughMemoryBuffer
>>
465 WriteThroughMemoryBuffer::getFileSlice(const Twine
&Filename
, uint64_t MapSize
,
467 return getReadWriteFile(Filename
, -1, MapSize
, Offset
);
470 template <typename MB
>
471 static ErrorOr
<std::unique_ptr
<MB
>>
472 getOpenFileImpl(sys::fs::file_t FD
, const Twine
&Filename
, uint64_t FileSize
,
473 uint64_t MapSize
, int64_t Offset
, bool RequiresNullTerminator
,
474 bool IsVolatile
, std::optional
<Align
> Alignment
) {
475 static int PageSize
= sys::Process::getPageSizeEstimate();
477 // Default is to map the full file.
478 if (MapSize
== uint64_t(-1)) {
479 // If we don't know the file size, use fstat to find out. fstat on an open
480 // file descriptor is cheaper than stat on a random path.
481 if (FileSize
== uint64_t(-1)) {
482 sys::fs::file_status Status
;
483 std::error_code EC
= sys::fs::status(FD
, Status
);
487 // If this not a file or a block device (e.g. it's a named pipe
488 // or character device), we can't trust the size. Create the memory
489 // buffer by copying off the stream.
490 sys::fs::file_type Type
= Status
.type();
491 if (Type
!= sys::fs::file_type::regular_file
&&
492 Type
!= sys::fs::file_type::block_file
)
493 return getMemoryBufferForStream(FD
, Filename
);
495 FileSize
= Status
.getSize();
500 if (shouldUseMmap(FD
, FileSize
, MapSize
, Offset
, RequiresNullTerminator
,
501 PageSize
, IsVolatile
)) {
503 std::unique_ptr
<MB
> Result(
504 new (NamedBufferAlloc(Filename
)) MemoryBufferMMapFile
<MB
>(
505 RequiresNullTerminator
, FD
, MapSize
, Offset
, EC
));
507 return std::move(Result
);
511 ErrorOr
<bool> NeedConversion
= needzOSConversion(Filename
.str().c_str(), FD
);
512 if (std::error_code EC
= NeedConversion
.getError())
514 // File size may increase due to EBCDIC -> UTF-8 conversion, therefore we
515 // cannot trust the file size and we create the memory buffer by copying
517 // Note: This only works with the assumption of reading a full file (i.e,
518 // Offset == 0 and MapSize == FileSize). Reading a file slice does not work.
519 if (Offset
== 0 && MapSize
== FileSize
&& *NeedConversion
)
520 return getMemoryBufferForStream(FD
, Filename
);
524 WritableMemoryBuffer::getNewUninitMemBuffer(MapSize
, Filename
, Alignment
);
526 // Failed to create a buffer. The only way it can fail is if
527 // new(std::nothrow) returns 0.
528 return make_error_code(errc::not_enough_memory
);
531 // Read until EOF, zero-initialize the rest.
532 MutableArrayRef
<char> ToRead
= Buf
->getBuffer();
533 while (!ToRead
.empty()) {
534 Expected
<size_t> ReadBytes
=
535 sys::fs::readNativeFileSlice(FD
, ToRead
, Offset
);
537 return errorToErrorCode(ReadBytes
.takeError());
538 if (*ReadBytes
== 0) {
539 std::memset(ToRead
.data(), 0, ToRead
.size());
542 ToRead
= ToRead
.drop_front(*ReadBytes
);
543 Offset
+= *ReadBytes
;
546 return std::move(Buf
);
549 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
550 MemoryBuffer::getOpenFile(sys::fs::file_t FD
, const Twine
&Filename
,
551 uint64_t FileSize
, bool RequiresNullTerminator
,
552 bool IsVolatile
, std::optional
<Align
> Alignment
) {
553 return getOpenFileImpl
<MemoryBuffer
>(FD
, Filename
, FileSize
, FileSize
, 0,
554 RequiresNullTerminator
, IsVolatile
,
558 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemoryBuffer::getOpenFileSlice(
559 sys::fs::file_t FD
, const Twine
&Filename
, uint64_t MapSize
, int64_t Offset
,
560 bool IsVolatile
, std::optional
<Align
> Alignment
) {
561 assert(MapSize
!= uint64_t(-1));
562 return getOpenFileImpl
<MemoryBuffer
>(FD
, Filename
, -1, MapSize
, Offset
, false,
563 IsVolatile
, Alignment
);
566 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemoryBuffer::getSTDIN() {
567 // Read in all of the data from stdin, we cannot mmap stdin.
569 // FIXME: That isn't necessarily true, we should try to mmap stdin and
570 // fallback if it fails.
571 sys::ChangeStdinMode(sys::fs::OF_Text
);
573 return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
576 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
577 MemoryBuffer::getFileAsStream(const Twine
&Filename
) {
578 Expected
<sys::fs::file_t
> FDOrErr
=
579 sys::fs::openNativeFileForRead(Filename
, sys::fs::OF_None
);
581 return errorToErrorCode(FDOrErr
.takeError());
582 sys::fs::file_t FD
= *FDOrErr
;
583 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> Ret
=
584 getMemoryBufferForStream(FD
, Filename
);
585 sys::fs::closeFile(FD
);
589 MemoryBufferRef
MemoryBuffer::getMemBufferRef() const {
590 StringRef Data
= getBuffer();
591 StringRef Identifier
= getBufferIdentifier();
592 return MemoryBufferRef(Data
, Identifier
);
595 SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default;