1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the writeArchive function.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Object/ArchiveWriter.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/ObjectFile.h"
21 #include "llvm/Object/SymbolicFile.h"
22 #include "llvm/Support/EndianStream.h"
23 #include "llvm/Support/Errc.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/Format.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/ToolOutputFile.h"
28 #include "llvm/Support/raw_ostream.h"
30 #if !defined(_MSC_VER) && !defined(__MINGW32__)
38 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef
)
39 : Buf(MemoryBuffer::getMemBuffer(BufRef
, false)),
40 MemberName(BufRef
.getBufferIdentifier()) {}
42 Expected
<NewArchiveMember
>
43 NewArchiveMember::getOldMember(const object::Archive::Child
&OldMember
,
45 Expected
<llvm::MemoryBufferRef
> BufOrErr
= OldMember
.getMemoryBufferRef();
47 return BufOrErr
.takeError();
50 assert(M
.IsNew
== false);
51 M
.Buf
= MemoryBuffer::getMemBuffer(*BufOrErr
, false);
52 M
.MemberName
= M
.Buf
->getBufferIdentifier();
54 auto ModTimeOrErr
= OldMember
.getLastModified();
56 return ModTimeOrErr
.takeError();
57 M
.ModTime
= ModTimeOrErr
.get();
58 Expected
<unsigned> UIDOrErr
= OldMember
.getUID();
60 return UIDOrErr
.takeError();
61 M
.UID
= UIDOrErr
.get();
62 Expected
<unsigned> GIDOrErr
= OldMember
.getGID();
64 return GIDOrErr
.takeError();
65 M
.GID
= GIDOrErr
.get();
66 Expected
<sys::fs::perms
> AccessModeOrErr
= OldMember
.getAccessMode();
68 return AccessModeOrErr
.takeError();
69 M
.Perms
= AccessModeOrErr
.get();
74 Expected
<NewArchiveMember
> NewArchiveMember::getFile(StringRef FileName
,
76 sys::fs::file_status Status
;
78 if (auto EC
= sys::fs::openFileForRead(FileName
, FD
))
79 return errorCodeToError(EC
);
82 if (auto EC
= sys::fs::status(FD
, Status
))
83 return errorCodeToError(EC
);
85 // Opening a directory doesn't make sense. Let it fail.
86 // Linux cannot open directories with open(2), although
87 // cygwin and *bsd can.
88 if (Status
.type() == sys::fs::file_type::directory_file
)
89 return errorCodeToError(make_error_code(errc::is_a_directory
));
91 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MemberBufferOrErr
=
92 MemoryBuffer::getOpenFile(FD
, FileName
, Status
.getSize(), false);
93 if (!MemberBufferOrErr
)
94 return errorCodeToError(MemberBufferOrErr
.getError());
97 return errorCodeToError(std::error_code(errno
, std::generic_category()));
101 M
.Buf
= std::move(*MemberBufferOrErr
);
102 M
.MemberName
= M
.Buf
->getBufferIdentifier();
103 if (!Deterministic
) {
104 M
.ModTime
= std::chrono::time_point_cast
<std::chrono::seconds
>(
105 Status
.getLastModificationTime());
106 M
.UID
= Status
.getUser();
107 M
.GID
= Status
.getGroup();
108 M
.Perms
= Status
.permissions();
113 template <typename T
>
114 static void printWithSpacePadding(raw_ostream
&OS
, T Data
, unsigned Size
) {
115 uint64_t OldPos
= OS
.tell();
117 unsigned SizeSoFar
= OS
.tell() - OldPos
;
118 assert(SizeSoFar
<= Size
&& "Data doesn't fit in Size");
119 OS
.indent(Size
- SizeSoFar
);
122 static bool isBSDLike(object::Archive::Kind Kind
) {
124 case object::Archive::K_GNU
:
125 case object::Archive::K_GNU64
:
127 case object::Archive::K_BSD
:
128 case object::Archive::K_DARWIN
:
130 case object::Archive::K_DARWIN64
:
131 case object::Archive::K_COFF
:
134 llvm_unreachable("not supported for writting");
138 static void print(raw_ostream
&Out
, object::Archive::Kind Kind
, T Val
) {
139 support::endian::write(Out
, Val
,
140 isBSDLike(Kind
) ? support::little
: support::big
);
143 static void printRestOfMemberHeader(
144 raw_ostream
&Out
, const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
145 unsigned UID
, unsigned GID
, unsigned Perms
, unsigned Size
) {
146 printWithSpacePadding(Out
, sys::toTimeT(ModTime
), 12);
148 // The format has only 6 chars for uid and gid. Truncate if the provided
150 printWithSpacePadding(Out
, UID
% 1000000, 6);
151 printWithSpacePadding(Out
, GID
% 1000000, 6);
153 printWithSpacePadding(Out
, format("%o", Perms
), 8);
154 printWithSpacePadding(Out
, Size
, 10);
159 printGNUSmallMemberHeader(raw_ostream
&Out
, StringRef Name
,
160 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
161 unsigned UID
, unsigned GID
, unsigned Perms
,
163 printWithSpacePadding(Out
, Twine(Name
) + "/", 16);
164 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
, Size
);
168 printBSDMemberHeader(raw_ostream
&Out
, uint64_t Pos
, StringRef Name
,
169 const sys::TimePoint
<std::chrono::seconds
> &ModTime
,
170 unsigned UID
, unsigned GID
, unsigned Perms
,
172 uint64_t PosAfterHeader
= Pos
+ 60 + Name
.size();
173 // Pad so that even 64 bit object files are aligned.
174 unsigned Pad
= OffsetToAlignment(PosAfterHeader
, 8);
175 unsigned NameWithPadding
= Name
.size() + Pad
;
176 printWithSpacePadding(Out
, Twine("#1/") + Twine(NameWithPadding
), 16);
177 printRestOfMemberHeader(Out
, ModTime
, UID
, GID
, Perms
,
178 NameWithPadding
+ Size
);
181 Out
.write(uint8_t(0));
184 static bool useStringTable(bool Thin
, StringRef Name
) {
185 return Thin
|| Name
.size() >= 16 || Name
.contains('/');
188 // Compute the relative path from From to To.
189 static std::string
computeRelativePath(StringRef From
, StringRef To
) {
190 if (sys::path::is_absolute(From
) || sys::path::is_absolute(To
))
193 StringRef DirFrom
= sys::path::parent_path(From
);
194 auto FromI
= sys::path::begin(DirFrom
);
195 auto ToI
= sys::path::begin(To
);
196 while (*FromI
== *ToI
) {
201 SmallString
<128> Relative
;
202 for (auto FromE
= sys::path::end(DirFrom
); FromI
!= FromE
; ++FromI
)
203 sys::path::append(Relative
, "..");
205 for (auto ToE
= sys::path::end(To
); ToI
!= ToE
; ++ToI
)
206 sys::path::append(Relative
, *ToI
);
209 // Replace backslashes with slashes so that the path is portable between *nix
211 std::replace(Relative
.begin(), Relative
.end(), '\\', '/');
214 return Relative
.str();
217 static bool is64BitKind(object::Archive::Kind Kind
) {
219 case object::Archive::K_GNU
:
220 case object::Archive::K_BSD
:
221 case object::Archive::K_DARWIN
:
222 case object::Archive::K_COFF
:
224 case object::Archive::K_DARWIN64
:
225 case object::Archive::K_GNU64
:
228 llvm_unreachable("not supported for writting");
231 static void addToStringTable(raw_ostream
&Out
, StringRef ArcName
,
232 const NewArchiveMember
&M
, bool Thin
) {
233 StringRef ID
= M
.Buf
->getBufferIdentifier();
236 Out
<< computeRelativePath(ArcName
, ID
);
244 static void printMemberHeader(raw_ostream
&Out
, uint64_t Pos
,
245 raw_ostream
&StringTable
,
246 object::Archive::Kind Kind
, bool Thin
,
247 StringRef ArcName
, const NewArchiveMember
&M
,
250 return printBSDMemberHeader(Out
, Pos
, M
.MemberName
, M
.ModTime
, M
.UID
, M
.GID
,
252 if (!useStringTable(Thin
, M
.MemberName
))
253 return printGNUSmallMemberHeader(Out
, M
.MemberName
, M
.ModTime
, M
.UID
, M
.GID
,
256 uint64_t NamePos
= StringTable
.tell();
257 addToStringTable(StringTable
, ArcName
, M
, Thin
);
258 printWithSpacePadding(Out
, NamePos
, 15);
259 printRestOfMemberHeader(Out
, M
.ModTime
, M
.UID
, M
.GID
, M
.Perms
, Size
);
264 std::vector
<unsigned> Symbols
;
271 static MemberData
computeStringTable(StringRef Names
) {
272 unsigned Size
= Names
.size();
273 unsigned Pad
= OffsetToAlignment(Size
, 2);
275 raw_string_ostream
Out(Header
);
276 printWithSpacePadding(Out
, "//", 48);
277 printWithSpacePadding(Out
, Size
+ Pad
, 10);
280 return {{}, std::move(Header
), Names
, Pad
? "\n" : ""};
283 static sys::TimePoint
<std::chrono::seconds
> now(bool Deterministic
) {
284 using namespace std::chrono
;
287 return time_point_cast
<seconds
>(system_clock::now());
288 return sys::TimePoint
<seconds
>();
291 static bool isArchiveSymbol(const object::BasicSymbolRef
&S
) {
292 uint32_t Symflags
= S
.getFlags();
293 if (Symflags
& object::SymbolRef::SF_FormatSpecific
)
295 if (!(Symflags
& object::SymbolRef::SF_Global
))
297 if (Symflags
& object::SymbolRef::SF_Undefined
)
302 static void printNBits(raw_ostream
&Out
, object::Archive::Kind Kind
,
304 if (is64BitKind(Kind
))
305 print
<uint64_t>(Out
, Kind
, Val
);
307 print
<uint32_t>(Out
, Kind
, Val
);
310 static void writeSymbolTable(raw_ostream
&Out
, object::Archive::Kind Kind
,
311 bool Deterministic
, ArrayRef
<MemberData
> Members
,
312 StringRef StringTable
) {
313 if (StringTable
.empty())
316 unsigned NumSyms
= 0;
317 for (const MemberData
&M
: Members
)
318 NumSyms
+= M
.Symbols
.size();
321 Size
+= is64BitKind(Kind
) ? 8 : 4; // Number of entries
323 Size
+= NumSyms
* 8; // Table
324 else if (is64BitKind(Kind
))
325 Size
+= NumSyms
* 8; // Table
327 Size
+= NumSyms
* 4; // Table
329 Size
+= 4; // byte count
330 Size
+= StringTable
.size();
331 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
332 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
334 // We do this for all bsd formats because it simplifies aligning members.
335 unsigned Alignment
= isBSDLike(Kind
) ? 8 : 2;
336 unsigned Pad
= OffsetToAlignment(Size
, Alignment
);
340 printBSDMemberHeader(Out
, Out
.tell(), "__.SYMDEF", now(Deterministic
), 0, 0,
342 else if (is64BitKind(Kind
))
343 printGNUSmallMemberHeader(Out
, "/SYM64", now(Deterministic
), 0, 0, 0, Size
);
345 printGNUSmallMemberHeader(Out
, "", now(Deterministic
), 0, 0, 0, Size
);
347 uint64_t Pos
= Out
.tell() + Size
;
350 print
<uint32_t>(Out
, Kind
, NumSyms
* 8);
352 printNBits(Out
, Kind
, NumSyms
);
354 for (const MemberData
&M
: Members
) {
355 for (unsigned StringOffset
: M
.Symbols
) {
357 print
<uint32_t>(Out
, Kind
, StringOffset
);
358 printNBits(Out
, Kind
, Pos
); // member offset
360 Pos
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
364 // byte count of the string table
365 print
<uint32_t>(Out
, Kind
, StringTable
.size());
369 Out
.write(uint8_t(0));
372 static Expected
<std::vector
<unsigned>>
373 getSymbols(MemoryBufferRef Buf
, raw_ostream
&SymNames
, bool &HasObject
) {
374 std::vector
<unsigned> Ret
;
376 // In the scenario when LLVMContext is populated SymbolicFile will contain a
377 // reference to it, thus SymbolicFile should be destroyed first.
379 std::unique_ptr
<object::SymbolicFile
> Obj
;
380 if (identify_magic(Buf
.getBuffer()) == file_magic::bitcode
) {
381 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(
382 Buf
, file_magic::bitcode
, &Context
);
384 // FIXME: check only for "not an object file" errors.
385 consumeError(ObjOrErr
.takeError());
388 Obj
= std::move(*ObjOrErr
);
390 auto ObjOrErr
= object::SymbolicFile::createSymbolicFile(Buf
);
392 // FIXME: check only for "not an object file" errors.
393 consumeError(ObjOrErr
.takeError());
396 Obj
= std::move(*ObjOrErr
);
400 for (const object::BasicSymbolRef
&S
: Obj
->symbols()) {
401 if (!isArchiveSymbol(S
))
403 Ret
.push_back(SymNames
.tell());
404 if (auto EC
= S
.printName(SymNames
))
405 return errorCodeToError(EC
);
411 static Expected
<std::vector
<MemberData
>>
412 computeMemberData(raw_ostream
&StringTable
, raw_ostream
&SymNames
,
413 object::Archive::Kind Kind
, bool Thin
, StringRef ArcName
,
414 ArrayRef
<NewArchiveMember
> NewMembers
) {
415 static char PaddingData
[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
417 // This ignores the symbol table, but we only need the value mod 8 and the
418 // symbol table is aligned to be a multiple of 8 bytes
421 std::vector
<MemberData
> Ret
;
422 bool HasObject
= false;
423 for (const NewArchiveMember
&M
: NewMembers
) {
425 raw_string_ostream
Out(Header
);
427 MemoryBufferRef Buf
= M
.Buf
->getMemBufferRef();
428 StringRef Data
= Thin
? "" : Buf
.getBuffer();
430 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
431 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
432 // uniformly. This matches the behaviour with cctools and ensures that ld64
433 // is happy with archives that we generate.
434 unsigned MemberPadding
= Kind
== object::Archive::K_DARWIN
435 ? OffsetToAlignment(Data
.size(), 8)
437 unsigned TailPadding
= OffsetToAlignment(Data
.size() + MemberPadding
, 2);
438 StringRef Padding
= StringRef(PaddingData
, MemberPadding
+ TailPadding
);
440 printMemberHeader(Out
, Pos
, StringTable
, Kind
, Thin
, ArcName
, M
,
441 Buf
.getBufferSize() + MemberPadding
);
444 Expected
<std::vector
<unsigned>> Symbols
=
445 getSymbols(Buf
, SymNames
, HasObject
);
446 if (auto E
= Symbols
.takeError())
449 Pos
+= Header
.size() + Data
.size() + Padding
.size();
450 Ret
.push_back({std::move(*Symbols
), std::move(Header
), Data
, Padding
});
452 // If there are no symbols, emit an empty symbol table, to satisfy Solaris
453 // tools, older versions of which expect a symbol table in a non-empty
454 // archive, regardless of whether there are any symbols in it.
455 if (HasObject
&& SymNames
.tell() == 0)
456 SymNames
<< '\0' << '\0' << '\0';
460 Error
llvm::writeArchive(StringRef ArcName
,
461 ArrayRef
<NewArchiveMember
> NewMembers
,
462 bool WriteSymtab
, object::Archive::Kind Kind
,
463 bool Deterministic
, bool Thin
,
464 std::unique_ptr
<MemoryBuffer
> OldArchiveBuf
) {
465 assert((!Thin
|| !isBSDLike(Kind
)) && "Only the gnu format has a thin mode");
467 SmallString
<0> SymNamesBuf
;
468 raw_svector_ostream
SymNames(SymNamesBuf
);
469 SmallString
<0> StringTableBuf
;
470 raw_svector_ostream
StringTable(StringTableBuf
);
472 Expected
<std::vector
<MemberData
>> DataOrErr
=
473 computeMemberData(StringTable
, SymNames
, Kind
, Thin
, ArcName
, NewMembers
);
474 if (Error E
= DataOrErr
.takeError())
476 std::vector
<MemberData
> &Data
= *DataOrErr
;
478 if (!StringTableBuf
.empty())
479 Data
.insert(Data
.begin(), computeStringTable(StringTableBuf
));
481 // We would like to detect if we need to switch to a 64-bit symbol table.
483 uint64_t MaxOffset
= 0;
484 uint64_t LastOffset
= MaxOffset
;
485 for (const auto &M
: Data
) {
486 // Record the start of the member's offset
487 LastOffset
= MaxOffset
;
488 // Account for the size of each part associated with the member.
489 MaxOffset
+= M
.Header
.size() + M
.Data
.size() + M
.Padding
.size();
490 // We assume 32-bit symbols to see if 32-bit symbols are possible or not.
491 MaxOffset
+= M
.Symbols
.size() * 4;
494 // The SYM64 format is used when an archive's member offsets are larger than
495 // 32-bits can hold. The need for this shift in format is detected by
496 // writeArchive. To test this we need to generate a file with a member that
497 // has an offset larger than 32-bits but this demands a very slow test. To
498 // speed the test up we use this environment variable to pretend like the
499 // cutoff happens before 32-bits and instead happens at some much smaller
501 const char *Sym64Env
= std::getenv("SYM64_THRESHOLD");
502 int Sym64Threshold
= 32;
504 StringRef(Sym64Env
).getAsInteger(10, Sym64Threshold
);
506 // If LastOffset isn't going to fit in a 32-bit varible we need to switch
507 // to 64-bit. Note that the file can be larger than 4GB as long as the last
508 // member starts before the 4GB offset.
509 if (LastOffset
>= (1ULL << Sym64Threshold
))
510 Kind
= object::Archive::K_GNU64
;
513 Expected
<sys::fs::TempFile
> Temp
=
514 sys::fs::TempFile::create(ArcName
+ ".temp-archive-%%%%%%%.a");
516 return Temp
.takeError();
518 raw_fd_ostream
Out(Temp
->FD
, false);
525 writeSymbolTable(Out
, Kind
, Deterministic
, Data
, SymNamesBuf
);
527 for (const MemberData
&M
: Data
)
528 Out
<< M
.Header
<< M
.Data
<< M
.Padding
;
532 // At this point, we no longer need whatever backing memory
533 // was used to generate the NewMembers. On Windows, this buffer
534 // could be a mapped view of the file we want to replace (if
535 // we're updating an existing archive, say). In that case, the
536 // rename would still succeed, but it would leave behind a
537 // temporary file (actually the original file renamed) because
538 // a file cannot be deleted while there's a handle open on it,
539 // only renamed. So by freeing this buffer, this ensures that
540 // the last open handle on the destination file, if any, is
541 // closed before we attempt to rename.
542 OldArchiveBuf
.reset();
544 return Temp
->keep(ArcName
);