1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MachOObjcopy.h"
10 #include "../llvm-objcopy.h"
11 #include "CommonConfig.h"
12 #include "MachO/MachOConfig.h"
13 #include "MachOReader.h"
14 #include "MachOWriter.h"
15 #include "MultiFormatConfig.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/Object/ArchiveWriter.h"
18 #include "llvm/Object/MachOUniversal.h"
19 #include "llvm/Object/MachOUniversalWriter.h"
20 #include "llvm/Support/Errc.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/FileOutputBuffer.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/SmallVectorMemoryBuffer.h"
27 using namespace llvm::objcopy
;
28 using namespace llvm::objcopy::macho
;
29 using namespace llvm::object
;
31 using SectionPred
= std::function
<bool(const std::unique_ptr
<Section
> &Sec
)>;
32 using LoadCommandPred
= std::function
<bool(const LoadCommand
&LC
)>;
35 static bool isLoadCommandWithPayloadString(const LoadCommand
&LC
) {
36 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38 return LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_RPATH
||
39 LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_ID_DYLIB
||
40 LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_LOAD_DYLIB
||
41 LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_LOAD_WEAK_DYLIB
;
45 static StringRef
getPayloadString(const LoadCommand
&LC
) {
46 assert(isLoadCommandWithPayloadString(LC
) &&
47 "unsupported load command encountered");
49 return StringRef(reinterpret_cast<const char *>(LC
.Payload
.data()),
54 static Error
removeSections(const CommonConfig
&Config
, Object
&Obj
) {
55 SectionPred RemovePred
= [](const std::unique_ptr
<Section
> &) {
59 if (!Config
.ToRemove
.empty()) {
60 RemovePred
= [&Config
, RemovePred
](const std::unique_ptr
<Section
> &Sec
) {
61 return Config
.ToRemove
.matches(Sec
->CanonicalName
);
65 if (Config
.StripAll
|| Config
.StripDebug
) {
66 // Remove all debug sections.
67 RemovePred
= [RemovePred
](const std::unique_ptr
<Section
> &Sec
) {
68 if (Sec
->Segname
== "__DWARF")
71 return RemovePred(Sec
);
75 if (!Config
.OnlySection
.empty()) {
76 // Overwrite RemovePred because --only-section takes priority.
77 RemovePred
= [&Config
](const std::unique_ptr
<Section
> &Sec
) {
78 return !Config
.OnlySection
.matches(Sec
->CanonicalName
);
82 return Obj
.removeSections(RemovePred
);
85 static void markSymbols(const CommonConfig
&, Object
&Obj
) {
86 // Symbols referenced from the indirect symbol table must not be removed.
87 for (IndirectSymbolEntry
&ISE
: Obj
.IndirectSymTable
.Symbols
)
89 (*ISE
.Symbol
)->Referenced
= true;
92 static void updateAndRemoveSymbols(const CommonConfig
&Config
,
93 const MachOConfig
&MachOConfig
,
95 for (SymbolEntry
&Sym
: Obj
.SymTable
) {
96 auto I
= Config
.SymbolsToRename
.find(Sym
.Name
);
97 if (I
!= Config
.SymbolsToRename
.end())
98 Sym
.Name
= std::string(I
->getValue());
101 auto RemovePred
= [Config
, MachOConfig
,
102 &Obj
](const std::unique_ptr
<SymbolEntry
> &N
) {
105 if (MachOConfig
.KeepUndefined
&& N
->isUndefinedSymbol())
107 if (N
->n_desc
& MachO::REFERENCED_DYNAMICALLY
)
111 if (Config
.DiscardMode
== DiscardType::All
&& !(N
->n_type
& MachO::N_EXT
))
113 // This behavior is consistent with cctools' strip.
114 if (MachOConfig
.StripSwiftSymbols
&&
115 (Obj
.Header
.Flags
& MachO::MH_DYLDLINK
) && Obj
.SwiftVersion
&&
116 *Obj
.SwiftVersion
&& N
->isSwiftSymbol())
121 Obj
.SymTable
.removeSymbols(RemovePred
);
124 template <typename LCType
>
125 static void updateLoadCommandPayloadString(LoadCommand
&LC
, StringRef S
) {
126 assert(isLoadCommandWithPayloadString(LC
) &&
127 "unsupported load command encountered");
129 uint32_t NewCmdsize
= alignTo(sizeof(LCType
) + S
.size() + 1, 8);
131 LC
.MachOLoadCommand
.load_command_data
.cmdsize
= NewCmdsize
;
132 LC
.Payload
.assign(NewCmdsize
- sizeof(LCType
), 0);
133 std::copy(S
.begin(), S
.end(), LC
.Payload
.begin());
136 static LoadCommand
buildRPathLoadCommand(StringRef Path
) {
138 MachO::rpath_command RPathLC
;
139 RPathLC
.cmd
= MachO::LC_RPATH
;
140 RPathLC
.path
= sizeof(MachO::rpath_command
);
141 RPathLC
.cmdsize
= alignTo(sizeof(MachO::rpath_command
) + Path
.size() + 1, 8);
142 LC
.MachOLoadCommand
.rpath_command_data
= RPathLC
;
143 LC
.Payload
.assign(RPathLC
.cmdsize
- sizeof(MachO::rpath_command
), 0);
144 std::copy(Path
.begin(), Path
.end(), LC
.Payload
.begin());
148 static Error
processLoadCommands(const MachOConfig
&MachOConfig
, Object
&Obj
) {
150 DenseSet
<StringRef
> RPathsToRemove(MachOConfig
.RPathsToRemove
.begin(),
151 MachOConfig
.RPathsToRemove
.end());
153 LoadCommandPred RemovePred
= [&RPathsToRemove
,
154 &MachOConfig
](const LoadCommand
&LC
) {
155 if (LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_RPATH
) {
156 // When removing all RPaths we don't need to care
157 // about what it contains
158 if (MachOConfig
.RemoveAllRpaths
)
161 StringRef RPath
= getPayloadString(LC
);
162 if (RPathsToRemove
.count(RPath
)) {
163 RPathsToRemove
.erase(RPath
);
170 if (Error E
= Obj
.removeLoadCommands(RemovePred
))
173 // Emit an error if the Mach-O binary does not contain an rpath path name
174 // specified in -delete_rpath.
175 for (StringRef RPath
: MachOConfig
.RPathsToRemove
) {
176 if (RPathsToRemove
.count(RPath
))
177 return createStringError(errc::invalid_argument
,
178 "no LC_RPATH load command with path: %s",
179 RPath
.str().c_str());
182 DenseSet
<StringRef
> RPaths
;
184 // Get all existing RPaths.
185 for (LoadCommand
&LC
: Obj
.LoadCommands
) {
186 if (LC
.MachOLoadCommand
.load_command_data
.cmd
== MachO::LC_RPATH
)
187 RPaths
.insert(getPayloadString(LC
));
190 // Throw errors for invalid RPaths.
191 for (const auto &OldNew
: MachOConfig
.RPathsToUpdate
) {
192 StringRef Old
= OldNew
.getFirst();
193 StringRef New
= OldNew
.getSecond();
194 if (!RPaths
.contains(Old
))
195 return createStringError(errc::invalid_argument
,
196 "no LC_RPATH load command with path: " + Old
);
197 if (RPaths
.contains(New
))
198 return createStringError(errc::invalid_argument
,
200 "' would create a duplicate load command");
203 // Update load commands.
204 for (LoadCommand
&LC
: Obj
.LoadCommands
) {
205 switch (LC
.MachOLoadCommand
.load_command_data
.cmd
) {
206 case MachO::LC_ID_DYLIB
:
207 if (MachOConfig
.SharedLibId
)
208 updateLoadCommandPayloadString
<MachO::dylib_command
>(
209 LC
, *MachOConfig
.SharedLibId
);
212 case MachO::LC_RPATH
: {
213 StringRef RPath
= getPayloadString(LC
);
214 StringRef NewRPath
= MachOConfig
.RPathsToUpdate
.lookup(RPath
);
215 if (!NewRPath
.empty())
216 updateLoadCommandPayloadString
<MachO::rpath_command
>(LC
, NewRPath
);
220 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
221 // here once llvm-objcopy supports them.
222 case MachO::LC_LOAD_DYLIB
:
223 case MachO::LC_LOAD_WEAK_DYLIB
:
224 StringRef InstallName
= getPayloadString(LC
);
225 StringRef NewInstallName
=
226 MachOConfig
.InstallNamesToUpdate
.lookup(InstallName
);
227 if (!NewInstallName
.empty())
228 updateLoadCommandPayloadString
<MachO::dylib_command
>(LC
,
235 for (StringRef RPath
: MachOConfig
.RPathToAdd
) {
236 if (RPaths
.contains(RPath
))
237 return createStringError(errc::invalid_argument
,
239 "' would create a duplicate load command");
240 RPaths
.insert(RPath
);
241 Obj
.LoadCommands
.push_back(buildRPathLoadCommand(RPath
));
244 for (StringRef RPath
: MachOConfig
.RPathToPrepend
) {
245 if (RPaths
.contains(RPath
))
246 return createStringError(errc::invalid_argument
,
248 "' would create a duplicate load command");
250 RPaths
.insert(RPath
);
251 Obj
.LoadCommands
.insert(Obj
.LoadCommands
.begin(),
252 buildRPathLoadCommand(RPath
));
255 // Unlike appending rpaths, the indexes of subsequent load commands must
256 // be recalculated after prepending one.
257 if (!MachOConfig
.RPathToPrepend
.empty())
258 Obj
.updateLoadCommandIndexes();
260 return Error::success();
263 static Error
dumpSectionToFile(StringRef SecName
, StringRef Filename
,
265 for (LoadCommand
&LC
: Obj
.LoadCommands
)
266 for (const std::unique_ptr
<Section
> &Sec
: LC
.Sections
) {
267 if (Sec
->CanonicalName
== SecName
) {
268 Expected
<std::unique_ptr
<FileOutputBuffer
>> BufferOrErr
=
269 FileOutputBuffer::create(Filename
, Sec
->Content
.size());
271 return BufferOrErr
.takeError();
272 std::unique_ptr
<FileOutputBuffer
> Buf
= std::move(*BufferOrErr
);
273 llvm::copy(Sec
->Content
, Buf
->getBufferStart());
275 if (Error E
= Buf
->commit())
277 return Error::success();
281 return createStringError(object_error::parse_failed
, "section '%s' not found",
282 SecName
.str().c_str());
285 static Error
addSection(StringRef SecName
, StringRef Filename
, Object
&Obj
) {
286 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufOrErr
=
287 MemoryBuffer::getFile(Filename
);
289 return createFileError(Filename
, errorCodeToError(BufOrErr
.getError()));
290 std::unique_ptr
<MemoryBuffer
> Buf
= std::move(*BufOrErr
);
292 std::pair
<StringRef
, StringRef
> Pair
= SecName
.split(',');
293 StringRef TargetSegName
= Pair
.first
;
294 Section
Sec(TargetSegName
, Pair
.second
);
295 Sec
.Content
= Obj
.NewSectionsContents
.save(Buf
->getBuffer());
296 Sec
.Size
= Sec
.Content
.size();
298 // Add the a section into an existing segment.
299 for (LoadCommand
&LC
: Obj
.LoadCommands
) {
300 Optional
<StringRef
> SegName
= LC
.getSegmentName();
301 if (SegName
&& SegName
== TargetSegName
) {
302 uint64_t Addr
= *LC
.getSegmentVMAddr();
303 for (const std::unique_ptr
<Section
> &S
: LC
.Sections
)
304 Addr
= std::max(Addr
, S
->Addr
+ S
->Size
);
305 LC
.Sections
.push_back(std::make_unique
<Section
>(Sec
));
306 LC
.Sections
.back()->Addr
= Addr
;
307 return Error::success();
311 // There's no segment named TargetSegName. Create a new load command and
312 // Insert a new section into it.
313 LoadCommand
&NewSegment
=
314 Obj
.addSegment(TargetSegName
, alignTo(Sec
.Size
, 16384));
315 NewSegment
.Sections
.push_back(std::make_unique
<Section
>(Sec
));
316 NewSegment
.Sections
.back()->Addr
= *NewSegment
.getSegmentVMAddr();
317 return Error::success();
320 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
321 // ("<segment>,<section>") and lengths of both segment and section names are
323 static Error
isValidMachOCannonicalName(StringRef Name
) {
324 if (Name
.count(',') != 1)
325 return createStringError(errc::invalid_argument
,
326 "invalid section name '%s' (should be formatted "
327 "as '<segment name>,<section name>')",
330 std::pair
<StringRef
, StringRef
> Pair
= Name
.split(',');
331 if (Pair
.first
.size() > 16)
332 return createStringError(errc::invalid_argument
,
333 "too long segment name: '%s'",
334 Pair
.first
.str().c_str());
335 if (Pair
.second
.size() > 16)
336 return createStringError(errc::invalid_argument
,
337 "too long section name: '%s'",
338 Pair
.second
.str().c_str());
339 return Error::success();
342 static Error
handleArgs(const CommonConfig
&Config
,
343 const MachOConfig
&MachOConfig
, Object
&Obj
) {
344 // Dump sections before add/remove for compatibility with GNU objcopy.
345 for (StringRef Flag
: Config
.DumpSection
) {
346 StringRef SectionName
;
348 std::tie(SectionName
, FileName
) = Flag
.split('=');
349 if (Error E
= dumpSectionToFile(SectionName
, FileName
, Obj
))
353 if (Error E
= removeSections(Config
, Obj
))
356 // Mark symbols to determine which symbols are still needed.
358 markSymbols(Config
, Obj
);
360 updateAndRemoveSymbols(Config
, MachOConfig
, Obj
);
363 for (LoadCommand
&LC
: Obj
.LoadCommands
)
364 for (std::unique_ptr
<Section
> &Sec
: LC
.Sections
)
365 Sec
->Relocations
.clear();
367 for (const auto &Flag
: Config
.AddSection
) {
368 std::pair
<StringRef
, StringRef
> SecPair
= Flag
.split("=");
369 StringRef SecName
= SecPair
.first
;
370 StringRef File
= SecPair
.second
;
371 if (Error E
= isValidMachOCannonicalName(SecName
))
373 if (Error E
= addSection(SecName
, File
, Obj
))
377 if (Error E
= processLoadCommands(MachOConfig
, Obj
))
380 return Error::success();
383 Error
objcopy::macho::executeObjcopyOnBinary(const CommonConfig
&Config
,
384 const MachOConfig
&MachOConfig
,
385 object::MachOObjectFile
&In
,
387 MachOReader
Reader(In
);
388 Expected
<std::unique_ptr
<Object
>> O
= Reader
.create();
390 return createFileError(Config
.InputFilename
, O
.takeError());
392 if (O
->get()->Header
.FileType
== MachO::HeaderFileType::MH_PRELOAD
)
393 return createStringError(std::errc::not_supported
,
394 "%s: MH_PRELOAD files are not supported",
395 Config
.InputFilename
.str().c_str());
397 if (Error E
= handleArgs(Config
, MachOConfig
, **O
))
398 return createFileError(Config
.InputFilename
, std::move(E
));
400 // Page size used for alignment of segment sizes in Mach-O executables and
401 // dynamic libraries.
403 switch (In
.getArch()) {
404 case Triple::ArchType::arm
:
405 case Triple::ArchType::aarch64
:
406 case Triple::ArchType::aarch64_32
:
413 MachOWriter
Writer(**O
, In
.is64Bit(), In
.isLittleEndian(),
414 sys::path::filename(Config
.OutputFilename
), PageSize
, Out
);
415 if (auto E
= Writer
.finalize())
417 return Writer
.write();
420 Error
objcopy::macho::executeObjcopyOnMachOUniversalBinary(
421 const MultiFormatConfig
&Config
, const MachOUniversalBinary
&In
,
423 SmallVector
<OwningBinary
<Binary
>, 2> Binaries
;
424 SmallVector
<Slice
, 2> Slices
;
425 for (const auto &O
: In
.objects()) {
426 Expected
<std::unique_ptr
<Archive
>> ArOrErr
= O
.getAsArchive();
428 Expected
<std::vector
<NewArchiveMember
>> NewArchiveMembersOrErr
=
429 createNewArchiveMembers(Config
, **ArOrErr
);
430 if (!NewArchiveMembersOrErr
)
431 return NewArchiveMembersOrErr
.takeError();
432 Expected
<std::unique_ptr
<MemoryBuffer
>> OutputBufferOrErr
=
433 writeArchiveToBuffer(*NewArchiveMembersOrErr
,
434 (*ArOrErr
)->hasSymbolTable(), (*ArOrErr
)->kind(),
435 Config
.getCommonConfig().DeterministicArchives
,
436 (*ArOrErr
)->isThin());
437 if (!OutputBufferOrErr
)
438 return OutputBufferOrErr
.takeError();
439 Expected
<std::unique_ptr
<Binary
>> BinaryOrErr
=
440 object::createBinary(**OutputBufferOrErr
);
442 return BinaryOrErr
.takeError();
443 Binaries
.emplace_back(std::move(*BinaryOrErr
),
444 std::move(*OutputBufferOrErr
));
445 Slices
.emplace_back(*cast
<Archive
>(Binaries
.back().getBinary()),
446 O
.getCPUType(), O
.getCPUSubType(),
447 O
.getArchFlagName(), O
.getAlign());
450 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
451 // ObjectForArch return an Error in case of the type mismatch. We need to
452 // check each in turn to see what kind of slice this is, so ignore errors
453 // produced along the way.
454 consumeError(ArOrErr
.takeError());
456 Expected
<std::unique_ptr
<MachOObjectFile
>> ObjOrErr
= O
.getAsObjectFile();
458 consumeError(ObjOrErr
.takeError());
459 return createStringError(
460 std::errc::invalid_argument
,
461 "slice for '%s' of the universal Mach-O binary "
462 "'%s' is not a Mach-O object or an archive",
463 O
.getArchFlagName().c_str(),
464 Config
.getCommonConfig().InputFilename
.str().c_str());
466 std::string ArchFlagName
= O
.getArchFlagName();
468 SmallVector
<char, 0> Buffer
;
469 raw_svector_ostream
MemStream(Buffer
);
471 Expected
<const MachOConfig
&> MachO
= Config
.getMachOConfig();
473 return MachO
.takeError();
475 if (Error E
= executeObjcopyOnBinary(Config
.getCommonConfig(), *MachO
,
476 **ObjOrErr
, MemStream
))
479 auto MB
= std::make_unique
<SmallVectorMemoryBuffer
>(
480 std::move(Buffer
), ArchFlagName
, /*RequiresNullTerminator=*/false);
481 Expected
<std::unique_ptr
<Binary
>> BinaryOrErr
= object::createBinary(*MB
);
483 return BinaryOrErr
.takeError();
484 Binaries
.emplace_back(std::move(*BinaryOrErr
), std::move(MB
));
485 Slices
.emplace_back(*cast
<MachOObjectFile
>(Binaries
.back().getBinary()),
489 if (Error Err
= writeUniversalBinaryToStream(Slices
, Out
))
492 return Error::success();