1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the writeArchive function.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Object/ArchiveWriter.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/BinaryFormat/Magic.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/Object/Archive.h"
19 #include "llvm/Object/ObjectFile.h"
20 #include "llvm/Object/SymbolicFile.h"
21 #include "llvm/Support/EndianStream.h"
22 #include "llvm/Support/Errc.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/Format.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/ToolOutputFile.h"
27 #include "llvm/Support/raw_ostream.h"
31 #if !defined(_MSC_VER) && !defined(__MINGW32__)
39 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef
)
40 : Buf(MemoryBuffer::getMemBuffer(BufRef
, false)),
41 MemberName(BufRef
.getBufferIdentifier()) {}
43 Expected
<NewArchiveMember
>
44 NewArchiveMember::getOldMember(const object::Archive::Child
&OldMember
,
46 Expected
<llvm::MemoryBufferRef
> BufOrErr
= OldMember
.getMemoryBufferRef();
48 return BufOrErr
.takeError();
51 M
.Buf
= MemoryBuffer::getMemBuffer(*BufOrErr
, false);
52 M
.MemberName
= M
.Buf
->getBufferIdentifier();
54 auto ModTimeOrErr
= OldMember
.getLastModified();
56 return ModTimeOrErr
.takeError();
57 M
.ModTime
= ModTimeOrErr
.get();
58 Expected
<unsigned> UIDOrErr
= OldMember
.getUID();
60 return UIDOrErr
.takeError();
61 M
.UID
= UIDOrErr
.get();
62 Expected
<unsigned> GIDOrErr
= OldMember
.getGID();
64 return GIDOrErr
.takeError();
65 M
.GID
= GIDOrErr
.get();
66 Expected
<sys::fs::perms
> AccessModeOrErr
= OldMember
.getAccessMode();
68 return AccessModeOrErr
.takeError();
69 M
.Perms
= AccessModeOrErr
.get();
74 Expected
<NewArchiveMember
> NewArchiveMember::getFile(StringRef FileName
,
76 sys::fs::file_status Status
;
78 if (auto EC
= sys::fs::openFileForRead(FileName
, FD
))
79 return errorCodeToError(EC
);
82 if (auto EC
= sys::fs::status(FD
, Status
))
83 return errorCodeToError(EC
);
85 // Opening a directory doesn't make sense. Let it fail.
86 // Linux cannot open directories with open(2), although
87 // cygwin and *bsd can.
88 if (Status
.type() == sys::fs::file_type::directory_file
)
89 return errorCodeToError(make_error_code(errc::is_a_directory
));
91 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemberBufferOrErr
=
92 MemoryBuffer::getOpenFile(FD
, FileName
, Status
.getSize(), false);
93 if (!MemberBufferOrErr
)
94 return errorCodeToError(MemberBufferOrErr
.getError());
97 return errorCodeToError(std::error_code(errno
, std::generic_category()));
100 M
.Buf
= std::move(*MemberBufferOrErr
);
101 M
.MemberName
= M
.Buf
->getBufferIdentifier();
102 if (!Deterministic
) {
103 M
.ModTime
= std::chrono::time_point_cast
<std::chrono::seconds
>(
104 Status
.getLastModificationTime());
105 M
.UID
= Status
.getUser();
106 M
.GID
= Status
.getGroup();
107 M
.Perms
= Status
.permissions();
112 template <typename T
>
113 static void printWithSpacePadding(raw_ostream
&OS
, T Data
, unsigned Size
) {
114 uint64_t OldPos
= OS
.tell();
116 unsigned SizeSoFar
= OS
.tell() - OldPos
;
117 assert(SizeSoFar
<= Size
&& "Data doesn't fit in Size");
118 OS
.indent(Size
- SizeSoFar
);
121 static bool isDarwin(object::Archive::Kind Kind
) {
122 return Kind
== object::Archive::K_DARWIN
||
123 Kind
== object::Archive::K_DARWIN64
;
126 static bool isBSDLike(object::Archive::Kind Kind
) {
128 case object::Archive::K_GNU
:
129 case object::Archive::K_GNU64
:
131 case object::Archive::K_BSD
:
132 case object::Archive::K_DARWIN
:
133 case object::Archive::K_DARWIN64
:
135 case object::Archive::K_COFF
:
138 llvm_unreachable("not supported for writting");
142 static void print(raw_ostream
&Out
, object::Archive::Kind Kind
, T Val
) {
143 support::endian::write(Out
, Val
,
144 isBSDLike(Kind
) ? support::little
: support::big
);
147 static void printRestOfMemberHeader(
148 raw_ostream
&Out
, const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
149 unsigned UID
, unsigned GID
, unsigned Perms
, unsigned Size
) {
150 printWithSpacePadding(Out
, sys::toTimeT(ModTime
), 12);
152 // The format has only 6 chars for uid and gid. Truncate if the provided
154 printWithSpacePadding(Out
, UID
% 1000000, 6);
155 printWithSpacePadding(Out
, GID
% 1000000, 6);
157 printWithSpacePadding(Out
, format("%o", Perms
), 8);
158 printWithSpacePadding(Out
, Size
, 10);
163 printGNUSmallMemberHeader(raw_ostream
&Out
, StringRef Name
,
164 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
165 unsigned UID
, unsigned GID
, unsigned Perms
,
167 printWithSpacePadding(Out
, Twine(Name
) + "/", 16);
168 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
, Size
);
172 printBSDMemberHeader(raw_ostream
&Out
, uint64_t Pos
, StringRef Name
,
173 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
174 unsigned UID
, unsigned GID
, unsigned Perms
,
176 uint64_t PosAfterHeader
= Pos
+ 60 + Name
.size();
177 // Pad so that even 64 bit object files are aligned.
178 unsigned Pad
= OffsetToAlignment(PosAfterHeader
, 8);
179 unsigned NameWithPadding
= Name
.size() + Pad
;
180 printWithSpacePadding(Out
, Twine("#1/") + Twine(NameWithPadding
), 16);
181 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
,
182 NameWithPadding
+ Size
);
185 Out
.write(uint8_t(0));
188 static bool useStringTable(bool Thin
, StringRef Name
) {
189 return Thin
|| Name
.size() >= 16 || Name
.contains('/');
192 static bool is64BitKind(object::Archive::Kind Kind
) {
194 case object::Archive::K_GNU
:
195 case object::Archive::K_BSD
:
196 case object::Archive::K_DARWIN
:
197 case object::Archive::K_COFF
:
199 case object::Archive::K_DARWIN64
:
200 case object::Archive::K_GNU64
:
203 llvm_unreachable("not supported for writting");
207 printMemberHeader(raw_ostream
&Out
, uint64_t Pos
, raw_ostream
&StringTable
,
208 StringMap
<uint64_t> &MemberNames
, object::Archive::Kind Kind
,
209 bool Thin
, const NewArchiveMember
&M
,
210 sys::TimePoint
<std::chrono::seconds
> ModTime
, unsigned Size
) {
212 return printBSDMemberHeader(Out
, Pos
, M
.MemberName
, ModTime
, M
.UID
, M
.GID
,
214 if (!useStringTable(Thin
, M
.MemberName
))
215 return printGNUSmallMemberHeader(Out
, M
.MemberName
, ModTime
, M
.UID
, M
.GID
,
220 NamePos
= StringTable
.tell();
221 StringTable
<< M
.MemberName
<< "/\n";
223 auto Insertion
= MemberNames
.insert({M
.MemberName
, uint64_t(0)});
224 if (Insertion
.second
) {
225 Insertion
.first
->second
= StringTable
.tell();
226 StringTable
<< M
.MemberName
<< "/\n";
228 NamePos
= Insertion
.first
->second
;
230 printWithSpacePadding(Out
, NamePos
, 15);
231 printRestOfMemberHeader(Out
, ModTime
, M
.UID
, M
.GID
, M
.Perms
, Size
);
236 std::vector
<unsigned> Symbols
;
243 static MemberData
computeStringTable(StringRef Names
) {
244 unsigned Size
= Names
.size();
245 unsigned Pad
= OffsetToAlignment(Size
, 2);
247 raw_string_ostream
Out(Header
);
248 printWithSpacePadding(Out
, "//", 48);
249 printWithSpacePadding(Out
, Size
+ Pad
, 10);
252 return {{}, std::move(Header
), Names
, Pad
? "\n" : ""};
255 static sys::TimePoint
<std::chrono::seconds
> now(bool Deterministic
) {
256 using namespace std::chrono
;
259 return time_point_cast
<seconds
>(system_clock::now());
260 return sys::TimePoint
<seconds
>();
263 static bool isArchiveSymbol(const object::BasicSymbolRef
&S
) {
264 uint32_t Symflags
= S
.getFlags();
265 if (Symflags
& object::SymbolRef::SF_FormatSpecific
)
267 if (!(Symflags
& object::SymbolRef::SF_Global
))
269 if (Symflags
& object::SymbolRef::SF_Undefined
)
274 static void printNBits(raw_ostream
&Out
, object::Archive::Kind Kind
,
276 if (is64BitKind(Kind
))
277 print
<uint64_t>(Out
, Kind
, Val
);
279 print
<uint32_t>(Out
, Kind
, Val
);
282 static void writeSymbolTable(raw_ostream
&Out
, object::Archive::Kind Kind
,
283 bool Deterministic
, ArrayRef
<MemberData
> Members
,
284 StringRef StringTable
) {
285 // We don't write a symbol table on an archive with no members -- except on
286 // Darwin, where the linker will abort unless the archive has a symbol table.
287 if (StringTable
.empty() && !isDarwin(Kind
))
290 unsigned NumSyms
= 0;
291 for (const MemberData
&M
: Members
)
292 NumSyms
+= M
.Symbols
.size();
295 unsigned OffsetSize
= is64BitKind(Kind
) ? sizeof(uint64_t) : sizeof(uint32_t);
297 Size
+= OffsetSize
; // Number of entries
299 Size
+= NumSyms
* OffsetSize
* 2; // Table
301 Size
+= NumSyms
* OffsetSize
; // Table
303 Size
+= OffsetSize
; // byte count
304 Size
+= StringTable
.size();
305 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
306 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
308 // We do this for all bsd formats because it simplifies aligning members.
309 unsigned Alignment
= isBSDLike(Kind
) ? 8 : 2;
310 unsigned Pad
= OffsetToAlignment(Size
, Alignment
);
313 if (isBSDLike(Kind
)) {
314 const char *Name
= is64BitKind(Kind
) ? "__.SYMDEF_64" : "__.SYMDEF";
315 printBSDMemberHeader(Out
, Out
.tell(), Name
, now(Deterministic
), 0, 0, 0,
318 const char *Name
= is64BitKind(Kind
) ? "/SYM64" : "";
319 printGNUSmallMemberHeader(Out
, Name
, now(Deterministic
), 0, 0, 0, Size
);
322 uint64_t Pos
= Out
.tell() + Size
;
325 printNBits(Out
, Kind
, NumSyms
* 2 * OffsetSize
);
327 printNBits(Out
, Kind
, NumSyms
);
329 for (const MemberData
&M
: Members
) {
330 for (unsigned StringOffset
: M
.Symbols
) {
332 printNBits(Out
, Kind
, StringOffset
);
333 printNBits(Out
, Kind
, Pos
); // member offset
335 Pos
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
339 // byte count of the string table
340 printNBits(Out
, Kind
, StringTable
.size());
344 Out
.write(uint8_t(0));
347 static Expected
<std::vector
<unsigned>>
348 getSymbols(MemoryBufferRef Buf
, raw_ostream
&SymNames
, bool &HasObject
) {
349 std::vector
<unsigned> Ret
;
351 // In the scenario when LLVMContext is populated SymbolicFile will contain a
352 // reference to it, thus SymbolicFile should be destroyed first.
354 std::unique_ptr
<object::SymbolicFile
> Obj
;
355 if (identify_magic(Buf
.getBuffer()) == file_magic::bitcode
) {
356 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(
357 Buf
, file_magic::bitcode
, &Context
);
359 // FIXME: check only for "not an object file" errors.
360 consumeError(ObjOrErr
.takeError());
363 Obj
= std::move(*ObjOrErr
);
365 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(Buf
);
367 // FIXME: check only for "not an object file" errors.
368 consumeError(ObjOrErr
.takeError());
371 Obj
= std::move(*ObjOrErr
);
375 for (const object::BasicSymbolRef
&S
: Obj
->symbols()) {
376 if (!isArchiveSymbol(S
))
378 Ret
.push_back(SymNames
.tell());
379 if (auto EC
= S
.printName(SymNames
))
380 return errorCodeToError(EC
);
386 static Expected
<std::vector
<MemberData
>>
387 computeMemberData(raw_ostream
&StringTable
, raw_ostream
&SymNames
,
388 object::Archive::Kind Kind
, bool Thin
, bool Deterministic
,
389 ArrayRef
<NewArchiveMember
> NewMembers
) {
390 static char PaddingData
[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
392 // This ignores the symbol table, but we only need the value mod 8 and the
393 // symbol table is aligned to be a multiple of 8 bytes
396 std::vector
<MemberData
> Ret
;
397 bool HasObject
= false;
399 // Deduplicate long member names in the string table and reuse earlier name
400 // offsets. This especially saves space for COFF Import libraries where all
401 // members have the same name.
402 StringMap
<uint64_t> MemberNames
;
404 // UniqueTimestamps is a special case to improve debugging on Darwin:
406 // The Darwin linker does not link debug info into the final
407 // binary. Instead, it emits entries of type N_OSO in in the output
408 // binary's symbol table, containing references to the linked-in
409 // object files. Using that reference, the debugger can read the
410 // debug data directly from the object files. Alternatively, an
411 // invocation of 'dsymutil' will link the debug data from the object
412 // files into a dSYM bundle, which can be loaded by the debugger,
413 // instead of the object files.
415 // For an object file, the N_OSO entries contain the absolute path
416 // path to the file, and the file's timestamp. For an object
417 // included in an archive, the path is formatted like
418 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
419 // archive member's timestamp, rather than the archive's timestamp.
421 // However, this doesn't always uniquely identify an object within
422 // an archive -- an archive file can have multiple entries with the
423 // same filename. (This will happen commonly if the original object
424 // files started in different directories.) The only way they get
425 // distinguished, then, is via the timestamp. But this process is
426 // unable to find the correct object file in the archive when there
427 // are two files of the same name and timestamp.
429 // Additionally, timestamp==0 is treated specially, and causes the
430 // timestamp to be ignored as a match criteria.
432 // That will "usually" work out okay when creating an archive not in
433 // deterministic timestamp mode, because the objects will probably
434 // have been created at different timestamps.
436 // To ameliorate this problem, in deterministic archive mode (which
437 // is the default), on Darwin we will emit a unique non-zero
438 // timestamp for each entry with a duplicated name. This is still
439 // deterministic: the only thing affecting that timestamp is the
440 // order of the files in the resultant archive.
442 // See also the functions that handle the lookup:
443 // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
444 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
445 bool UniqueTimestamps
= Deterministic
&& isDarwin(Kind
);
446 std::map
<StringRef
, unsigned> FilenameCount
;
447 if (UniqueTimestamps
) {
448 for (const NewArchiveMember
&M
: NewMembers
)
449 FilenameCount
[M
.MemberName
]++;
450 for (auto &Entry
: FilenameCount
)
451 Entry
.second
= Entry
.second
> 1 ? 1 : 0;
454 for (const NewArchiveMember
&M
: NewMembers
) {
456 raw_string_ostream
Out(Header
);
458 MemoryBufferRef Buf
= M
.Buf
->getMemBufferRef();
459 StringRef Data
= Thin
? "" : Buf
.getBuffer();
461 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
462 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
463 // uniformly. This matches the behaviour with cctools and ensures that ld64
464 // is happy with archives that we generate.
465 unsigned MemberPadding
=
466 isDarwin(Kind
) ? OffsetToAlignment(Data
.size(), 8) : 0;
467 unsigned TailPadding
= OffsetToAlignment(Data
.size() + MemberPadding
, 2);
468 StringRef Padding
= StringRef(PaddingData
, MemberPadding
+ TailPadding
);
470 sys::TimePoint
<std::chrono::seconds
> ModTime
;
471 if (UniqueTimestamps
)
472 // Increment timestamp for each file of a given name.
473 ModTime
= sys::toTimePoint(FilenameCount
[M
.MemberName
]++);
476 printMemberHeader(Out
, Pos
, StringTable
, MemberNames
, Kind
, Thin
, M
,
477 ModTime
, Buf
.getBufferSize() + MemberPadding
);
480 Expected
<std::vector
<unsigned>> Symbols
=
481 getSymbols(Buf
, SymNames
, HasObject
);
482 if (auto E
= Symbols
.takeError())
485 Pos
+= Header
.size() + Data
.size() + Padding
.size();
486 Ret
.push_back({std::move(*Symbols
), std::move(Header
), Data
, Padding
});
488 // If there are no symbols, emit an empty symbol table, to satisfy Solaris
489 // tools, older versions of which expect a symbol table in a non-empty
490 // archive, regardless of whether there are any symbols in it.
491 if (HasObject
&& SymNames
.tell() == 0)
492 SymNames
<< '\0' << '\0' << '\0';
497 // Compute the relative path from From to To.
498 std::string
computeArchiveRelativePath(StringRef From
, StringRef To
) {
499 if (sys::path::is_absolute(From
) || sys::path::is_absolute(To
))
502 StringRef DirFrom
= sys::path::parent_path(From
);
503 auto FromI
= sys::path::begin(DirFrom
);
504 auto ToI
= sys::path::begin(To
);
505 while (*FromI
== *ToI
) {
510 SmallString
<128> Relative
;
511 for (auto FromE
= sys::path::end(DirFrom
); FromI
!= FromE
; ++FromI
)
512 sys::path::append(Relative
, "..");
514 for (auto ToE
= sys::path::end(To
); ToI
!= ToE
; ++ToI
)
515 sys::path::append(Relative
, *ToI
);
518 // Replace backslashes with slashes so that the path is portable between *nix
520 std::replace(Relative
.begin(), Relative
.end(), '\\', '/');
523 return Relative
.str();
526 Error
writeArchive(StringRef ArcName
, ArrayRef
<NewArchiveMember
> NewMembers
,
527 bool WriteSymtab
, object::Archive::Kind Kind
,
528 bool Deterministic
, bool Thin
,
529 std::unique_ptr
<MemoryBuffer
> OldArchiveBuf
) {
530 assert((!Thin
|| !isBSDLike(Kind
)) && "Only the gnu format has a thin mode");
532 SmallString
<0> SymNamesBuf
;
533 raw_svector_ostream
SymNames(SymNamesBuf
);
534 SmallString
<0> StringTableBuf
;
535 raw_svector_ostream
StringTable(StringTableBuf
);
537 Expected
<std::vector
<MemberData
>> DataOrErr
= computeMemberData(
538 StringTable
, SymNames
, Kind
, Thin
, Deterministic
, NewMembers
);
539 if (Error E
= DataOrErr
.takeError())
541 std::vector
<MemberData
> &Data
= *DataOrErr
;
543 if (!StringTableBuf
.empty())
544 Data
.insert(Data
.begin(), computeStringTable(StringTableBuf
));
546 // We would like to detect if we need to switch to a 64-bit symbol table.
548 uint64_t MaxOffset
= 0;
549 uint64_t LastOffset
= MaxOffset
;
550 for (const auto &M
: Data
) {
551 // Record the start of the member's offset
552 LastOffset
= MaxOffset
;
553 // Account for the size of each part associated with the member.
554 MaxOffset
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
555 // We assume 32-bit symbols to see if 32-bit symbols are possible or not.
556 MaxOffset
+= M
.Symbols
.size() * 4;
559 // The SYM64 format is used when an archive's member offsets are larger than
560 // 32-bits can hold. The need for this shift in format is detected by
561 // writeArchive. To test this we need to generate a file with a member that
562 // has an offset larger than 32-bits but this demands a very slow test. To
563 // speed the test up we use this environment variable to pretend like the
564 // cutoff happens before 32-bits and instead happens at some much smaller
566 const char *Sym64Env
= std::getenv("SYM64_THRESHOLD");
567 int Sym64Threshold
= 32;
569 StringRef(Sym64Env
).getAsInteger(10, Sym64Threshold
);
571 // If LastOffset isn't going to fit in a 32-bit varible we need to switch
572 // to 64-bit. Note that the file can be larger than 4GB as long as the last
573 // member starts before the 4GB offset.
574 if (LastOffset
>= (1ULL << Sym64Threshold
)) {
575 if (Kind
== object::Archive::K_DARWIN
)
576 Kind
= object::Archive::K_DARWIN64
;
578 Kind
= object::Archive::K_GNU64
;
582 Expected
<sys::fs::TempFile
> Temp
=
583 sys::fs::TempFile::create(ArcName
+ ".temp-archive-%%%%%%%.a");
585 return Temp
.takeError();
587 raw_fd_ostream
Out(Temp
->FD
, false);
594 writeSymbolTable(Out
, Kind
, Deterministic
, Data
, SymNamesBuf
);
596 for (const MemberData
&M
: Data
)
597 Out
<< M
.Header
<< M
.Data
<< M
.Padding
;
601 // At this point, we no longer need whatever backing memory
602 // was used to generate the NewMembers. On Windows, this buffer
603 // could be a mapped view of the file we want to replace (if
604 // we're updating an existing archive, say). In that case, the
605 // rename would still succeed, but it would leave behind a
606 // temporary file (actually the original file renamed) because
607 // a file cannot be deleted while there's a handle open on it,
608 // only renamed. So by freeing this buffer, this ensures that
609 // the last open handle on the destination file, if any, is
610 // closed before we attempt to rename.
611 OldArchiveBuf
.reset();
613 return Temp
->keep(ArcName
);