1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the writeArchive function.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Object/ArchiveWriter.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/ObjectFile.h"
21 #include "llvm/Object/SymbolicFile.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/Errc.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/Format.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/ToolOutputFile.h"
28 #include "llvm/Support/raw_ostream.h"
32 #if !defined(_MSC_VER) && !defined(__MINGW32__)
40 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef
)
41 : Buf(MemoryBuffer::getMemBuffer(BufRef
, false)),
42 MemberName(BufRef
.getBufferIdentifier()) {}
44 Expected
<NewArchiveMember
>
45 NewArchiveMember::getOldMember(const object::Archive::Child
&OldMember
,
47 Expected
<llvm::MemoryBufferRef
> BufOrErr
= OldMember
.getMemoryBufferRef();
49 return BufOrErr
.takeError();
52 assert(M
.IsNew
== false);
53 M
.Buf
= MemoryBuffer::getMemBuffer(*BufOrErr
, false);
54 M
.MemberName
= M
.Buf
->getBufferIdentifier();
56 auto ModTimeOrErr
= OldMember
.getLastModified();
58 return ModTimeOrErr
.takeError();
59 M
.ModTime
= ModTimeOrErr
.get();
60 Expected
<unsigned> UIDOrErr
= OldMember
.getUID();
62 return UIDOrErr
.takeError();
63 M
.UID
= UIDOrErr
.get();
64 Expected
<unsigned> GIDOrErr
= OldMember
.getGID();
66 return GIDOrErr
.takeError();
67 M
.GID
= GIDOrErr
.get();
68 Expected
<sys::fs::perms
> AccessModeOrErr
= OldMember
.getAccessMode();
70 return AccessModeOrErr
.takeError();
71 M
.Perms
= AccessModeOrErr
.get();
76 Expected
<NewArchiveMember
> NewArchiveMember::getFile(StringRef FileName
,
78 sys::fs::file_status Status
;
80 if (auto EC
= sys::fs::openFileForRead(FileName
, FD
))
81 return errorCodeToError(EC
);
84 if (auto EC
= sys::fs::status(FD
, Status
))
85 return errorCodeToError(EC
);
87 // Opening a directory doesn't make sense. Let it fail.
88 // Linux cannot open directories with open(2), although
89 // cygwin and *bsd can.
90 if (Status
.type() == sys::fs::file_type::directory_file
)
91 return errorCodeToError(make_error_code(errc::is_a_directory
));
93 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemberBufferOrErr
=
94 MemoryBuffer::getOpenFile(FD
, FileName
, Status
.getSize(), false);
95 if (!MemberBufferOrErr
)
96 return errorCodeToError(MemberBufferOrErr
.getError());
99 return errorCodeToError(std::error_code(errno
, std::generic_category()));
103 M
.Buf
= std::move(*MemberBufferOrErr
);
104 M
.MemberName
= M
.Buf
->getBufferIdentifier();
105 if (!Deterministic
) {
106 M
.ModTime
= std::chrono::time_point_cast
<std::chrono::seconds
>(
107 Status
.getLastModificationTime());
108 M
.UID
= Status
.getUser();
109 M
.GID
= Status
.getGroup();
110 M
.Perms
= Status
.permissions();
115 template <typename T
>
116 static void printWithSpacePadding(raw_ostream
&OS
, T Data
, unsigned Size
) {
117 uint64_t OldPos
= OS
.tell();
119 unsigned SizeSoFar
= OS
.tell() - OldPos
;
120 assert(SizeSoFar
<= Size
&& "Data doesn't fit in Size");
121 OS
.indent(Size
- SizeSoFar
);
124 static bool isDarwin(object::Archive::Kind Kind
) {
125 return Kind
== object::Archive::K_DARWIN
||
126 Kind
== object::Archive::K_DARWIN64
;
129 static bool isBSDLike(object::Archive::Kind Kind
) {
131 case object::Archive::K_GNU
:
132 case object::Archive::K_GNU64
:
134 case object::Archive::K_BSD
:
135 case object::Archive::K_DARWIN
:
136 case object::Archive::K_DARWIN64
:
138 case object::Archive::K_COFF
:
141 llvm_unreachable("not supported for writting");
145 static void print(raw_ostream
&Out
, object::Archive::Kind Kind
, T Val
) {
146 support::endian::write(Out
, Val
,
147 isBSDLike(Kind
) ? support::little
: support::big
);
150 static void printRestOfMemberHeader(
151 raw_ostream
&Out
, const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
152 unsigned UID
, unsigned GID
, unsigned Perms
, unsigned Size
) {
153 printWithSpacePadding(Out
, sys::toTimeT(ModTime
), 12);
155 // The format has only 6 chars for uid and gid. Truncate if the provided
157 printWithSpacePadding(Out
, UID
% 1000000, 6);
158 printWithSpacePadding(Out
, GID
% 1000000, 6);
160 printWithSpacePadding(Out
, format("%o", Perms
), 8);
161 printWithSpacePadding(Out
, Size
, 10);
166 printGNUSmallMemberHeader(raw_ostream
&Out
, StringRef Name
,
167 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
168 unsigned UID
, unsigned GID
, unsigned Perms
,
170 printWithSpacePadding(Out
, Twine(Name
) + "/", 16);
171 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
, Size
);
175 printBSDMemberHeader(raw_ostream
&Out
, uint64_t Pos
, StringRef Name
,
176 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
177 unsigned UID
, unsigned GID
, unsigned Perms
,
179 uint64_t PosAfterHeader
= Pos
+ 60 + Name
.size();
180 // Pad so that even 64 bit object files are aligned.
181 unsigned Pad
= OffsetToAlignment(PosAfterHeader
, 8);
182 unsigned NameWithPadding
= Name
.size() + Pad
;
183 printWithSpacePadding(Out
, Twine("#1/") + Twine(NameWithPadding
), 16);
184 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
,
185 NameWithPadding
+ Size
);
188 Out
.write(uint8_t(0));
191 static bool useStringTable(bool Thin
, StringRef Name
) {
192 return Thin
|| Name
.size() >= 16 || Name
.contains('/');
195 // Compute the relative path from From to To.
196 static std::string
computeRelativePath(StringRef From
, StringRef To
) {
197 if (sys::path::is_absolute(From
) || sys::path::is_absolute(To
))
200 StringRef DirFrom
= sys::path::parent_path(From
);
201 auto FromI
= sys::path::begin(DirFrom
);
202 auto ToI
= sys::path::begin(To
);
203 while (*FromI
== *ToI
) {
208 SmallString
<128> Relative
;
209 for (auto FromE
= sys::path::end(DirFrom
); FromI
!= FromE
; ++FromI
)
210 sys::path::append(Relative
, "..");
212 for (auto ToE
= sys::path::end(To
); ToI
!= ToE
; ++ToI
)
213 sys::path::append(Relative
, *ToI
);
216 // Replace backslashes with slashes so that the path is portable between *nix
218 std::replace(Relative
.begin(), Relative
.end(), '\\', '/');
221 return Relative
.str();
224 static bool is64BitKind(object::Archive::Kind Kind
) {
226 case object::Archive::K_GNU
:
227 case object::Archive::K_BSD
:
228 case object::Archive::K_DARWIN
:
229 case object::Archive::K_COFF
:
231 case object::Archive::K_DARWIN64
:
232 case object::Archive::K_GNU64
:
235 llvm_unreachable("not supported for writting");
238 static void addToStringTable(raw_ostream
&Out
, StringRef ArcName
,
239 const NewArchiveMember
&M
, bool Thin
) {
240 StringRef ID
= M
.Buf
->getBufferIdentifier();
243 Out
<< computeRelativePath(ArcName
, ID
);
251 static void printMemberHeader(raw_ostream
&Out
, uint64_t Pos
,
252 raw_ostream
&StringTable
,
253 object::Archive::Kind Kind
, bool Thin
,
254 StringRef ArcName
, const NewArchiveMember
&M
,
255 sys::TimePoint
<std::chrono::seconds
> ModTime
,
259 return printBSDMemberHeader(Out
, Pos
, M
.MemberName
, ModTime
, M
.UID
, M
.GID
,
261 if (!useStringTable(Thin
, M
.MemberName
))
262 return printGNUSmallMemberHeader(Out
, M
.MemberName
, ModTime
, M
.UID
, M
.GID
,
265 uint64_t NamePos
= StringTable
.tell();
266 addToStringTable(StringTable
, ArcName
, M
, Thin
);
267 printWithSpacePadding(Out
, NamePos
, 15);
268 printRestOfMemberHeader(Out
, ModTime
, M
.UID
, M
.GID
, M
.Perms
, Size
);
273 std::vector
<unsigned> Symbols
;
280 static MemberData
computeStringTable(StringRef Names
) {
281 unsigned Size
= Names
.size();
282 unsigned Pad
= OffsetToAlignment(Size
, 2);
284 raw_string_ostream
Out(Header
);
285 printWithSpacePadding(Out
, "//", 48);
286 printWithSpacePadding(Out
, Size
+ Pad
, 10);
289 return {{}, std::move(Header
), Names
, Pad
? "\n" : ""};
292 static sys::TimePoint
<std::chrono::seconds
> now(bool Deterministic
) {
293 using namespace std::chrono
;
296 return time_point_cast
<seconds
>(system_clock::now());
297 return sys::TimePoint
<seconds
>();
300 static bool isArchiveSymbol(const object::BasicSymbolRef
&S
) {
301 uint32_t Symflags
= S
.getFlags();
302 if (Symflags
& object::SymbolRef::SF_FormatSpecific
)
304 if (!(Symflags
& object::SymbolRef::SF_Global
))
306 if (Symflags
& object::SymbolRef::SF_Undefined
)
311 static void printNBits(raw_ostream
&Out
, object::Archive::Kind Kind
,
313 if (is64BitKind(Kind
))
314 print
<uint64_t>(Out
, Kind
, Val
);
316 print
<uint32_t>(Out
, Kind
, Val
);
319 static void writeSymbolTable(raw_ostream
&Out
, object::Archive::Kind Kind
,
320 bool Deterministic
, ArrayRef
<MemberData
> Members
,
321 StringRef StringTable
) {
322 // We don't write a symbol table on an archive with no members -- except on
323 // Darwin, where the linker will abort unless the archive has a symbol table.
324 if (StringTable
.empty() && !isDarwin(Kind
))
327 unsigned NumSyms
= 0;
328 for (const MemberData
&M
: Members
)
329 NumSyms
+= M
.Symbols
.size();
332 unsigned OffsetSize
= is64BitKind(Kind
) ? sizeof(uint64_t) : sizeof(uint32_t);
334 Size
+= OffsetSize
; // Number of entries
336 Size
+= NumSyms
* OffsetSize
* 2; // Table
338 Size
+= NumSyms
* OffsetSize
; // Table
340 Size
+= OffsetSize
; // byte count
341 Size
+= StringTable
.size();
342 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
343 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
345 // We do this for all bsd formats because it simplifies aligning members.
346 unsigned Alignment
= isBSDLike(Kind
) ? 8 : 2;
347 unsigned Pad
= OffsetToAlignment(Size
, Alignment
);
350 if (isBSDLike(Kind
)) {
351 const char *Name
= is64BitKind(Kind
) ? "__.SYMDEF_64" : "__.SYMDEF";
352 printBSDMemberHeader(Out
, Out
.tell(), Name
, now(Deterministic
), 0, 0, 0,
355 const char *Name
= is64BitKind(Kind
) ? "/SYM64" : "";
356 printGNUSmallMemberHeader(Out
, Name
, now(Deterministic
), 0, 0, 0, Size
);
359 uint64_t Pos
= Out
.tell() + Size
;
362 printNBits(Out
, Kind
, NumSyms
* 2 * OffsetSize
);
364 printNBits(Out
, Kind
, NumSyms
);
366 for (const MemberData
&M
: Members
) {
367 for (unsigned StringOffset
: M
.Symbols
) {
369 printNBits(Out
, Kind
, StringOffset
);
370 printNBits(Out
, Kind
, Pos
); // member offset
372 Pos
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
376 // byte count of the string table
377 printNBits(Out
, Kind
, StringTable
.size());
381 Out
.write(uint8_t(0));
384 static Expected
<std::vector
<unsigned>>
385 getSymbols(MemoryBufferRef Buf
, raw_ostream
&SymNames
, bool &HasObject
) {
386 std::vector
<unsigned> Ret
;
388 // In the scenario when LLVMContext is populated SymbolicFile will contain a
389 // reference to it, thus SymbolicFile should be destroyed first.
391 std::unique_ptr
<object::SymbolicFile
> Obj
;
392 if (identify_magic(Buf
.getBuffer()) == file_magic::bitcode
) {
393 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(
394 Buf
, file_magic::bitcode
, &Context
);
396 // FIXME: check only for "not an object file" errors.
397 consumeError(ObjOrErr
.takeError());
400 Obj
= std::move(*ObjOrErr
);
402 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(Buf
);
404 // FIXME: check only for "not an object file" errors.
405 consumeError(ObjOrErr
.takeError());
408 Obj
= std::move(*ObjOrErr
);
412 for (const object::BasicSymbolRef
&S
: Obj
->symbols()) {
413 if (!isArchiveSymbol(S
))
415 Ret
.push_back(SymNames
.tell());
416 if (auto EC
= S
.printName(SymNames
))
417 return errorCodeToError(EC
);
423 static Expected
<std::vector
<MemberData
>>
424 computeMemberData(raw_ostream
&StringTable
, raw_ostream
&SymNames
,
425 object::Archive::Kind Kind
, bool Thin
, StringRef ArcName
,
426 bool Deterministic
, ArrayRef
<NewArchiveMember
> NewMembers
) {
427 static char PaddingData
[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
429 // This ignores the symbol table, but we only need the value mod 8 and the
430 // symbol table is aligned to be a multiple of 8 bytes
433 std::vector
<MemberData
> Ret
;
434 bool HasObject
= false;
436 // UniqueTimestamps is a special case to improve debugging on Darwin:
438 // The Darwin linker does not link debug info into the final
439 // binary. Instead, it emits entries of type N_OSO in in the output
440 // binary's symbol table, containing references to the linked-in
441 // object files. Using that reference, the debugger can read the
442 // debug data directly from the object files. Alternatively, an
443 // invocation of 'dsymutil' will link the debug data from the object
444 // files into a dSYM bundle, which can be loaded by the debugger,
445 // instead of the object files.
447 // For an object file, the N_OSO entries contain the absolute path
448 // path to the file, and the file's timestamp. For an object
449 // included in an archive, the path is formatted like
450 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
451 // archive member's timestamp, rather than the archive's timestamp.
453 // However, this doesn't always uniquely identify an object within
454 // an archive -- an archive file can have multiple entries with the
455 // same filename. (This will happen commonly if the original object
456 // files started in different directories.) The only way they get
457 // distinguished, then, is via the timestamp. But this process is
458 // unable to find the correct object file in the archive when there
459 // are two files of the same name and timestamp.
461 // Additionally, timestamp==0 is treated specially, and causes the
462 // timestamp to be ignored as a match criteria.
464 // That will "usually" work out okay when creating an archive not in
465 // deterministic timestamp mode, because the objects will probably
466 // have been created at different timestamps.
468 // To ameliorate this problem, in deterministic archive mode (which
469 // is the default), on Darwin we will emit a unique non-zero
470 // timestamp for each entry with a duplicated name. This is still
471 // deterministic: the only thing affecting that timestamp is the
472 // order of the files in the resultant archive.
474 // See also the functions that handle the lookup:
475 // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
476 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
477 bool UniqueTimestamps
= Deterministic
&& isDarwin(Kind
);
478 std::map
<StringRef
, unsigned> FilenameCount
;
479 if (UniqueTimestamps
) {
480 for (const NewArchiveMember
&M
: NewMembers
)
481 FilenameCount
[M
.MemberName
]++;
482 for (auto &Entry
: FilenameCount
)
483 Entry
.second
= Entry
.second
> 1 ? 1 : 0;
486 for (const NewArchiveMember
&M
: NewMembers
) {
488 raw_string_ostream
Out(Header
);
490 MemoryBufferRef Buf
= M
.Buf
->getMemBufferRef();
491 StringRef Data
= Thin
? "" : Buf
.getBuffer();
493 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
494 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
495 // uniformly. This matches the behaviour with cctools and ensures that ld64
496 // is happy with archives that we generate.
497 unsigned MemberPadding
=
498 isDarwin(Kind
) ? OffsetToAlignment(Data
.size(), 8) : 0;
499 unsigned TailPadding
= OffsetToAlignment(Data
.size() + MemberPadding
, 2);
500 StringRef Padding
= StringRef(PaddingData
, MemberPadding
+ TailPadding
);
502 sys::TimePoint
<std::chrono::seconds
> ModTime
;
503 if (UniqueTimestamps
)
504 // Increment timestamp for each file of a given name.
505 ModTime
= sys::toTimePoint(FilenameCount
[M
.MemberName
]++);
508 printMemberHeader(Out
, Pos
, StringTable
, Kind
, Thin
, ArcName
, M
, ModTime
,
509 Buf
.getBufferSize() + MemberPadding
);
512 Expected
<std::vector
<unsigned>> Symbols
=
513 getSymbols(Buf
, SymNames
, HasObject
);
514 if (auto E
= Symbols
.takeError())
517 Pos
+= Header
.size() + Data
.size() + Padding
.size();
518 Ret
.push_back({std::move(*Symbols
), std::move(Header
), Data
, Padding
});
520 // If there are no symbols, emit an empty symbol table, to satisfy Solaris
521 // tools, older versions of which expect a symbol table in a non-empty
522 // archive, regardless of whether there are any symbols in it.
523 if (HasObject
&& SymNames
.tell() == 0)
524 SymNames
<< '\0' << '\0' << '\0';
528 Error
llvm::writeArchive(StringRef ArcName
,
529 ArrayRef
<NewArchiveMember
> NewMembers
,
530 bool WriteSymtab
, object::Archive::Kind Kind
,
531 bool Deterministic
, bool Thin
,
532 std::unique_ptr
<MemoryBuffer
> OldArchiveBuf
) {
533 assert((!Thin
|| !isBSDLike(Kind
)) && "Only the gnu format has a thin mode");
535 SmallString
<0> SymNamesBuf
;
536 raw_svector_ostream
SymNames(SymNamesBuf
);
537 SmallString
<0> StringTableBuf
;
538 raw_svector_ostream
StringTable(StringTableBuf
);
540 Expected
<std::vector
<MemberData
>> DataOrErr
= computeMemberData(
541 StringTable
, SymNames
, Kind
, Thin
, ArcName
, Deterministic
, NewMembers
);
542 if (Error E
= DataOrErr
.takeError())
544 std::vector
<MemberData
> &Data
= *DataOrErr
;
546 if (!StringTableBuf
.empty())
547 Data
.insert(Data
.begin(), computeStringTable(StringTableBuf
));
549 // We would like to detect if we need to switch to a 64-bit symbol table.
551 uint64_t MaxOffset
= 0;
552 uint64_t LastOffset
= MaxOffset
;
553 for (const auto &M
: Data
) {
554 // Record the start of the member's offset
555 LastOffset
= MaxOffset
;
556 // Account for the size of each part associated with the member.
557 MaxOffset
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
558 // We assume 32-bit symbols to see if 32-bit symbols are possible or not.
559 MaxOffset
+= M
.Symbols
.size() * 4;
562 // The SYM64 format is used when an archive's member offsets are larger than
563 // 32-bits can hold. The need for this shift in format is detected by
564 // writeArchive. To test this we need to generate a file with a member that
565 // has an offset larger than 32-bits but this demands a very slow test. To
566 // speed the test up we use this environment variable to pretend like the
567 // cutoff happens before 32-bits and instead happens at some much smaller
569 const char *Sym64Env
= std::getenv("SYM64_THRESHOLD");
570 int Sym64Threshold
= 32;
572 StringRef(Sym64Env
).getAsInteger(10, Sym64Threshold
);
574 // If LastOffset isn't going to fit in a 32-bit varible we need to switch
575 // to 64-bit. Note that the file can be larger than 4GB as long as the last
576 // member starts before the 4GB offset.
577 if (LastOffset
>= (1ULL << Sym64Threshold
)) {
578 if (Kind
== object::Archive::K_DARWIN
)
579 Kind
= object::Archive::K_DARWIN64
;
581 Kind
= object::Archive::K_GNU64
;
585 Expected
<sys::fs::TempFile
> Temp
=
586 sys::fs::TempFile::create(ArcName
+ ".temp-archive-%%%%%%%.a");
588 return Temp
.takeError();
590 raw_fd_ostream
Out(Temp
->FD
, false);
597 writeSymbolTable(Out
, Kind
, Deterministic
, Data
, SymNamesBuf
);
599 for (const MemberData
&M
: Data
)
600 Out
<< M
.Header
<< M
.Data
<< M
.Padding
;
604 // At this point, we no longer need whatever backing memory
605 // was used to generate the NewMembers. On Windows, this buffer
606 // could be a mapped view of the file we want to replace (if
607 // we're updating an existing archive, say). In that case, the
608 // rename would still succeed, but it would leave behind a
609 // temporary file (actually the original file renamed) because
610 // a file cannot be deleted while there's a handle open on it,
611 // only renamed. So by freeing this buffer, this ensures that
612 // the last open handle on the destination file, if any, is
613 // closed before we attempt to rename.
614 OldArchiveBuf
.reset();
616 return Temp
->keep(ArcName
);