1 //===- ObjC.cpp -----------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Support/TimeProfiler.h"
25 using namespace llvm::MachO
;
27 using namespace lld::macho
;
29 template <class LP
> static bool objectHasObjCSection(MemoryBufferRef mb
) {
30 using SectionHeader
= typename
LP::section
;
33 reinterpret_cast<const typename
LP::mach_header
*>(mb
.getBufferStart());
34 if (hdr
->magic
!= LP::magic
)
38 findCommand
<typename
LP::segment_command
>(hdr
, LP::segmentLCType
)) {
39 auto sectionHeaders
= ArrayRef
<SectionHeader
>{
40 reinterpret_cast<const SectionHeader
*>(c
+ 1), c
->nsects
};
41 for (const SectionHeader
&secHead
: sectionHeaders
) {
42 StringRef
sectname(secHead
.sectname
,
43 strnlen(secHead
.sectname
, sizeof(secHead
.sectname
)));
44 StringRef
segname(secHead
.segname
,
45 strnlen(secHead
.segname
, sizeof(secHead
.segname
)));
46 if ((segname
== segment_names::data
&&
47 sectname
== section_names::objcCatList
) ||
48 (segname
== segment_names::text
&&
49 sectname
.starts_with(section_names::swift
))) {
57 static bool objectHasObjCSection(MemoryBufferRef mb
) {
58 if (target
->wordSize
== 8)
59 return ::objectHasObjCSection
<LP64
>(mb
);
61 return ::objectHasObjCSection
<ILP32
>(mb
);
64 bool macho::hasObjCSection(MemoryBufferRef mb
) {
65 switch (identify_magic(mb
.getBuffer())) {
66 case file_magic::macho_object
:
67 return objectHasObjCSection(mb
);
68 case file_magic::bitcode
:
69 return check(isBitcodeContainingObjCCategory(mb
));
77 #define FOR_EACH_CATEGORY_FIELD(DO) \
80 DO(Ptr, instanceMethods) \
81 DO(Ptr, classMethods) \
83 DO(Ptr, instanceProps) \
87 CREATE_LAYOUT_CLASS(Category
, FOR_EACH_CATEGORY_FIELD
);
89 #undef FOR_EACH_CATEGORY_FIELD
91 #define FOR_EACH_CLASS_FIELD(DO) \
94 DO(Ptr, methodCache) \
98 CREATE_LAYOUT_CLASS(Class
, FOR_EACH_CLASS_FIELD
);
100 #undef FOR_EACH_CLASS_FIELD
102 #define FOR_EACH_RO_CLASS_FIELD(DO) \
103 DO(uint32_t, flags) \
104 DO(uint32_t, instanceStart) \
105 DO(Ptr, instanceSize) \
106 DO(Ptr, ivarLayout) \
108 DO(Ptr, baseMethods) \
109 DO(Ptr, baseProtocols) \
111 DO(Ptr, weakIvarLayout) \
112 DO(Ptr, baseProperties)
114 CREATE_LAYOUT_CLASS(ROClass
, FOR_EACH_RO_CLASS_FIELD
);
116 #undef FOR_EACH_RO_CLASS_FIELD
118 #define FOR_EACH_LIST_HEADER(DO) \
119 DO(uint32_t, structSize) \
120 DO(uint32_t, structCount)
122 CREATE_LAYOUT_CLASS(ListHeader
, FOR_EACH_LIST_HEADER
);
124 #undef FOR_EACH_LIST_HEADER
126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
128 CREATE_LAYOUT_CLASS(ProtocolListHeader
, FOR_EACH_PROTOCOL_LIST_HEADER
);
130 #undef FOR_EACH_PROTOCOL_LIST_HEADER
132 #define FOR_EACH_METHOD(DO) \
137 CREATE_LAYOUT_CLASS(Method
, FOR_EACH_METHOD
);
139 #undef FOR_EACH_METHOD
141 enum MethodContainerKind
{
146 struct MethodContainer
{
147 MethodContainerKind kind
;
148 const ConcatInputSection
*isec
;
157 DenseMap
<CachedHashStringRef
, MethodContainer
> instanceMethods
;
158 DenseMap
<CachedHashStringRef
, MethodContainer
> classMethods
;
163 class ObjcCategoryChecker
{
165 ObjcCategoryChecker();
166 void parseCategory(const ConcatInputSection
*catListIsec
);
169 void parseClass(const Defined
*classSym
);
170 void parseMethods(const ConcatInputSection
*methodsIsec
,
171 const Symbol
*methodContainer
,
172 const ConcatInputSection
*containerIsec
,
173 MethodContainerKind
, MethodKind
);
175 CategoryLayout catLayout
;
176 ClassLayout classLayout
;
177 ROClassLayout roClassLayout
;
178 ListHeaderLayout listHeaderLayout
;
179 MethodLayout methodLayout
;
181 DenseMap
<const Symbol
*, ObjcClass
> classMap
;
184 ObjcCategoryChecker::ObjcCategoryChecker()
185 : catLayout(target
->wordSize
), classLayout(target
->wordSize
),
186 roClassLayout(target
->wordSize
), listHeaderLayout(target
->wordSize
),
187 methodLayout(target
->wordSize
) {}
189 void ObjcCategoryChecker::parseMethods(const ConcatInputSection
*methodsIsec
,
190 const Symbol
*methodContainerSym
,
191 const ConcatInputSection
*containerIsec
,
192 MethodContainerKind mcKind
,
194 ObjcClass
&klass
= classMap
[methodContainerSym
];
195 for (const Reloc
&r
: methodsIsec
->relocs
) {
196 if ((r
.offset
- listHeaderLayout
.totalSize
) % methodLayout
.totalSize
!=
197 methodLayout
.nameOffset
)
200 CachedHashStringRef
methodName(r
.getReferentString());
201 // +load methods are special: all implementations are called by the runtime
202 // even if they are part of the same class. Thus there is no need to check
204 // NOTE: Instead of specifically checking for this method name, ld64 simply
205 // checks whether a class / category is present in __objc_nlclslist /
206 // __objc_nlcatlist respectively. This will be the case if the class /
207 // category has a +load method. It skips optimizing the categories if there
208 // are multiple +load methods. Since it does dupe checking as part of the
209 // optimization process, this avoids spurious dupe messages around +load,
210 // but it also means that legit dupe issues for other methods are ignored.
211 if (mKind
== MK_Static
&& methodName
.val() == "load")
215 mKind
== MK_Instance
? klass
.instanceMethods
: klass
.classMethods
;
217 .try_emplace(methodName
, MethodContainer
{mcKind
, containerIsec
})
221 // We have a duplicate; generate a warning message.
222 const auto &mc
= methodMap
.lookup(methodName
);
223 const Reloc
*nameReloc
= nullptr;
224 if (mc
.kind
== MCK_Category
) {
225 nameReloc
= mc
.isec
->getRelocAt(catLayout
.nameOffset
);
227 assert(mc
.kind
== MCK_Class
);
228 const auto *roIsec
= mc
.isec
->getRelocAt(classLayout
.roDataOffset
)
229 ->getReferentInputSection();
230 nameReloc
= roIsec
->getRelocAt(roClassLayout
.nameOffset
);
232 StringRef containerName
= nameReloc
->getReferentString();
233 StringRef methPrefix
= mKind
== MK_Instance
? "-" : "+";
235 // We should only ever encounter collisions when parsing category methods
236 // (since the Class struct is parsed before any of its categories).
237 assert(mcKind
== MCK_Category
);
238 StringRef newCatName
=
239 containerIsec
->getRelocAt(catLayout
.nameOffset
)->getReferentString();
241 auto formatObjAndSrcFileName
= [](const InputSection
*section
) {
242 lld::macho::InputFile
*inputFile
= section
->getFile();
243 std::string result
= toString(inputFile
);
245 auto objFile
= dyn_cast_or_null
<ObjFile
>(inputFile
);
246 if (objFile
&& objFile
->compileUnit
)
247 result
+= " (" + objFile
->sourceFile() + ")";
252 StringRef containerType
= mc
.kind
== MCK_Category
? "category" : "class";
253 warn("method '" + methPrefix
+ methodName
.val() +
254 "' has conflicting definitions:\n>>> defined in category " +
255 newCatName
+ " from " + formatObjAndSrcFileName(containerIsec
) +
256 "\n>>> defined in " + containerType
+ " " + containerName
+ " from " +
257 formatObjAndSrcFileName(mc
.isec
));
261 void ObjcCategoryChecker::parseCategory(const ConcatInputSection
*catIsec
) {
262 auto *classReloc
= catIsec
->getRelocAt(catLayout
.klassOffset
);
266 auto *classSym
= classReloc
->referent
.get
<Symbol
*>();
267 if (auto *d
= dyn_cast
<Defined
>(classSym
))
268 if (!classMap
.count(d
))
271 if (const auto *r
= catIsec
->getRelocAt(catLayout
.classMethodsOffset
)) {
272 parseMethods(cast
<ConcatInputSection
>(r
->getReferentInputSection()),
273 classSym
, catIsec
, MCK_Category
, MK_Static
);
276 if (const auto *r
= catIsec
->getRelocAt(catLayout
.instanceMethodsOffset
)) {
277 parseMethods(cast
<ConcatInputSection
>(r
->getReferentInputSection()),
278 classSym
, catIsec
, MCK_Category
, MK_Instance
);
282 void ObjcCategoryChecker::parseClass(const Defined
*classSym
) {
283 // Given a Class struct, get its corresponding Methods struct
284 auto getMethodsIsec
=
285 [&](const InputSection
*classIsec
) -> ConcatInputSection
* {
286 if (const auto *r
= classIsec
->getRelocAt(classLayout
.roDataOffset
)) {
287 if (const auto *roIsec
=
288 cast_or_null
<ConcatInputSection
>(r
->getReferentInputSection())) {
290 roIsec
->getRelocAt(roClassLayout
.baseMethodsOffset
)) {
291 if (auto *methodsIsec
= cast_or_null
<ConcatInputSection
>(
292 r
->getReferentInputSection()))
300 const auto *classIsec
= cast
<ConcatInputSection
>(classSym
->isec());
302 // Parse instance methods.
303 if (const auto *instanceMethodsIsec
= getMethodsIsec(classIsec
))
304 parseMethods(instanceMethodsIsec
, classSym
, classIsec
, MCK_Class
,
307 // Class methods are contained in the metaclass.
308 if (const auto *r
= classSym
->isec()->getRelocAt(classLayout
.metaClassOffset
))
309 if (const auto *classMethodsIsec
= getMethodsIsec(
310 cast
<ConcatInputSection
>(r
->getReferentInputSection())))
311 parseMethods(classMethodsIsec
, classSym
, classIsec
, MCK_Class
, MK_Static
);
314 void objc::checkCategories() {
315 TimeTraceScope
timeScope("ObjcCategoryChecker");
317 ObjcCategoryChecker checker
;
318 for (const InputSection
*isec
: inputSections
) {
319 if (isec
->getName() == section_names::objcCatList
)
320 for (const Reloc
&r
: isec
->relocs
) {
321 auto *catIsec
= cast
<ConcatInputSection
>(r
.getReferentInputSection());
322 checker
.parseCategory(catIsec
);
329 class ObjcCategoryMerger
{
330 // In which language was a particular construct originally defined
331 enum SourceLanguage
{ Unknown
, ObjC
, Swift
};
333 // Information about an input category
334 struct InfoInputCategory
{
335 ConcatInputSection
*catListIsec
;
336 ConcatInputSection
*catBodyIsec
;
337 uint32_t offCatListIsec
= 0;
338 SourceLanguage sourceLanguage
= SourceLanguage::Unknown
;
340 bool wasMerged
= false;
343 // To write new (merged) categories or classes, we will try make limited
344 // assumptions about the alignment and the sections the various class/category
345 // info are stored in and . So we'll just reuse the same sections and
346 // alignment as already used in existing (input) categories. To do this we
347 // have InfoCategoryWriter which contains the various sections that the
348 // generated categories will be written to.
349 struct InfoWriteSection
{
350 bool valid
= false; // Data has been successfully collected from input
352 Section
*inputSection
;
354 OutputSection
*outputSection
;
357 struct InfoCategoryWriter
{
358 InfoWriteSection catListInfo
;
359 InfoWriteSection catBodyInfo
;
360 InfoWriteSection catNameInfo
;
361 InfoWriteSection catPtrListInfo
;
364 // Information about a pointer list in the original categories or class(method
365 // lists, protocol lists, etc)
366 struct PointerListInfo
{
367 PointerListInfo() = default;
368 PointerListInfo(const PointerListInfo
&) = default;
369 PointerListInfo(const char *_categoryPrefix
, uint32_t _pointersPerStruct
)
370 : categoryPrefix(_categoryPrefix
),
371 pointersPerStruct(_pointersPerStruct
) {}
373 inline bool operator==(const PointerListInfo
&cmp
) const {
374 return pointersPerStruct
== cmp
.pointersPerStruct
&&
375 structSize
== cmp
.structSize
&& structCount
== cmp
.structCount
&&
376 allPtrs
== cmp
.allPtrs
;
379 const char *categoryPrefix
;
381 uint32_t pointersPerStruct
= 0;
383 uint32_t structSize
= 0;
384 uint32_t structCount
= 0;
386 std::vector
<Symbol
*> allPtrs
;
389 // Full information describing an ObjC class . This will include all the
390 // additional methods, protocols, and properties that are contained in the
391 // class and all the categories that extend a particular class.
392 struct ClassExtensionInfo
{
393 ClassExtensionInfo(CategoryLayout
&_catLayout
) : catLayout(_catLayout
){};
395 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
396 std::string mergedContainerName
;
397 std::string baseClassName
;
398 const Symbol
*baseClass
= nullptr;
399 SourceLanguage baseClassSourceLanguage
= SourceLanguage::Unknown
;
401 CategoryLayout
&catLayout
;
403 // In case we generate new data, mark the new data as belonging to this file
404 ObjFile
*objFileForMergeData
= nullptr;
406 PointerListInfo instanceMethods
= {objc::symbol_names::instanceMethods
,
407 /*pointersPerStruct=*/3};
408 PointerListInfo classMethods
= {objc::symbol_names::categoryClassMethods
,
409 /*pointersPerStruct=*/3};
410 PointerListInfo protocols
= {objc::symbol_names::categoryProtocols
,
411 /*pointersPerStruct=*/0};
412 PointerListInfo instanceProps
= {objc::symbol_names::listProprieties
,
413 /*pointersPerStruct=*/2};
414 PointerListInfo classProps
= {objc::symbol_names::klassPropList
,
415 /*pointersPerStruct=*/2};
419 ObjcCategoryMerger(std::vector
<ConcatInputSection
*> &_allInputSections
);
421 static void doCleanup();
424 DenseSet
<const Symbol
*> collectNlCategories();
425 void collectAndValidateCategoriesData();
427 mergeCategoriesIntoSingleCategory(std::vector
<InfoInputCategory
> &categories
);
429 void eraseISec(ConcatInputSection
*isec
);
430 void eraseMergedCategories();
432 void generateCatListForNonErasedCategories(
433 MapVector
<ConcatInputSection
*, std::set
<uint64_t>>
434 catListToErasedOffsets
);
435 void collectSectionWriteInfoFromIsec(const InputSection
*isec
,
436 InfoWriteSection
&catWriteInfo
);
437 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory
&catInfo
);
438 bool parseCatInfoToExtInfo(const InfoInputCategory
&catInfo
,
439 ClassExtensionInfo
&extInfo
);
441 void parseProtocolListInfo(const ConcatInputSection
*isec
, uint32_t secOffset
,
442 PointerListInfo
&ptrList
,
443 SourceLanguage sourceLang
);
445 PointerListInfo
parseProtocolListInfo(const ConcatInputSection
*isec
,
447 SourceLanguage sourceLang
);
449 bool parsePointerListInfo(const ConcatInputSection
*isec
, uint32_t secOffset
,
450 PointerListInfo
&ptrList
);
452 void emitAndLinkPointerList(Defined
*parentSym
, uint32_t linkAtOffset
,
453 const ClassExtensionInfo
&extInfo
,
454 const PointerListInfo
&ptrList
);
456 Defined
*emitAndLinkProtocolList(Defined
*parentSym
, uint32_t linkAtOffset
,
457 const ClassExtensionInfo
&extInfo
,
458 const PointerListInfo
&ptrList
);
460 Defined
*emitCategory(const ClassExtensionInfo
&extInfo
);
461 Defined
*emitCatListEntrySec(const std::string
&forCategoryName
,
462 const std::string
&forBaseClassName
,
464 Defined
*emitCategoryBody(const std::string
&name
, const Defined
*nameSym
,
465 const Symbol
*baseClassSym
,
466 const std::string
&baseClassName
, ObjFile
*objFile
);
467 Defined
*emitCategoryName(const std::string
&name
, ObjFile
*objFile
);
468 void createSymbolReference(Defined
*refFrom
, const Symbol
*refTo
,
469 uint32_t offset
, const Reloc
&relocTemplate
);
470 Defined
*tryFindDefinedOnIsec(const InputSection
*isec
, uint32_t offset
);
471 Symbol
*tryGetSymbolAtIsecOffset(const ConcatInputSection
*isec
,
473 Defined
*tryGetDefinedAtIsecOffset(const ConcatInputSection
*isec
,
475 Defined
*getClassRo(const Defined
*classSym
, bool getMetaRo
);
476 SourceLanguage
getClassSymSourceLang(const Defined
*classSym
);
477 bool mergeCategoriesIntoBaseClass(const Defined
*baseClass
,
478 std::vector
<InfoInputCategory
> &categories
);
479 void eraseSymbolAtIsecOffset(ConcatInputSection
*isec
, uint32_t offset
);
480 void tryEraseDefinedAtIsecOffset(const ConcatInputSection
*isec
,
483 // Allocate a null-terminated StringRef backed by generatedSectionData
484 StringRef
newStringData(const char *str
);
485 // Allocate section data, backed by generatedSectionData
486 SmallVector
<uint8_t> &newSectionData(uint32_t size
);
488 CategoryLayout catLayout
;
489 ClassLayout classLayout
;
490 ROClassLayout roClassLayout
;
491 ListHeaderLayout listHeaderLayout
;
492 MethodLayout methodLayout
;
493 ProtocolListHeaderLayout protocolListHeaderLayout
;
495 InfoCategoryWriter infoCategoryWriter
;
496 std::vector
<ConcatInputSection
*> &allInputSections
;
497 // Map of base class Symbol to list of InfoInputCategory's for it
498 MapVector
<const Symbol
*, std::vector
<InfoInputCategory
>> categoryMap
;
500 // Normally, the binary data comes from the input files, but since we're
501 // generating binary data ourselves, we use the below array to store it in.
502 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
503 // object, as the data will be read by the Writer when the final binary is
505 static SmallVector
<std::unique_ptr
<SmallVector
<uint8_t>>>
506 generatedSectionData
;
509 SmallVector
<std::unique_ptr
<SmallVector
<uint8_t>>>
510 ObjcCategoryMerger::generatedSectionData
;
512 ObjcCategoryMerger::ObjcCategoryMerger(
513 std::vector
<ConcatInputSection
*> &_allInputSections
)
514 : catLayout(target
->wordSize
), classLayout(target
->wordSize
),
515 roClassLayout(target
->wordSize
), listHeaderLayout(target
->wordSize
),
516 methodLayout(target
->wordSize
),
517 protocolListHeaderLayout(target
->wordSize
),
518 allInputSections(_allInputSections
) {}
520 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
521 const InputSection
*isec
, InfoWriteSection
&catWriteInfo
) {
523 catWriteInfo
.inputSection
= const_cast<Section
*>(&isec
->section
);
524 catWriteInfo
.align
= isec
->align
;
525 catWriteInfo
.outputSection
= isec
->parent
;
527 assert(catWriteInfo
.outputSection
&&
528 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
530 if (isec
->relocs
.size())
531 catWriteInfo
.relocTemplate
= isec
->relocs
[0];
533 catWriteInfo
.valid
= true;
537 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection
*isec
,
541 const Reloc
*reloc
= isec
->getRelocAt(offset
);
546 Symbol
*sym
= reloc
->referent
.dyn_cast
<Symbol
*>();
548 if (reloc
->addend
&& sym
) {
549 assert(isa
<Defined
>(sym
) && "Expected defined for non-zero addend");
550 Defined
*definedSym
= cast
<Defined
>(sym
);
551 sym
= tryFindDefinedOnIsec(definedSym
->isec(),
552 definedSym
->value
+ reloc
->addend
);
558 Defined
*ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection
*isec
,
560 for (Defined
*sym
: isec
->symbols
)
561 if ((sym
->value
<= offset
) && (sym
->value
+ sym
->size
> offset
))
568 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection
*isec
,
570 Symbol
*sym
= tryGetSymbolAtIsecOffset(isec
, offset
);
571 return dyn_cast_or_null
<Defined
>(sym
);
574 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
575 // the meta-class's ro_data symbol. Otherwise, we will return the class
576 // (instance) ro_data symbol.
577 Defined
*ObjcCategoryMerger::getClassRo(const Defined
*classSym
,
579 ConcatInputSection
*isec
= dyn_cast
<ConcatInputSection
>(classSym
->isec());
584 return tryGetDefinedAtIsecOffset(isec
, classLayout
.roDataOffset
+
587 Defined
*metaClass
= tryGetDefinedAtIsecOffset(
588 isec
, classLayout
.metaClassOffset
+ classSym
->value
);
592 return tryGetDefinedAtIsecOffset(
593 dyn_cast
<ConcatInputSection
>(metaClass
->isec()),
594 classLayout
.roDataOffset
);
597 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
598 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
599 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
600 const ConcatInputSection
*isec
, uint32_t offset
) {
601 const Reloc
*reloc
= isec
->getRelocAt(offset
);
606 Defined
*sym
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
610 if (auto *cisec
= dyn_cast_or_null
<ConcatInputSection
>(sym
->isec()))
612 else if (auto *csisec
= dyn_cast_or_null
<CStringInputSection
>(sym
->isec())) {
613 uint32_t totalOffset
= sym
->value
+ reloc
->addend
;
614 StringPiece
&piece
= csisec
->getStringPiece(totalOffset
);
617 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
621 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
622 const InfoInputCategory
&catInfo
) {
624 if (!infoCategoryWriter
.catListInfo
.valid
)
625 collectSectionWriteInfoFromIsec(catInfo
.catListIsec
,
626 infoCategoryWriter
.catListInfo
);
627 if (!infoCategoryWriter
.catBodyInfo
.valid
)
628 collectSectionWriteInfoFromIsec(catInfo
.catBodyIsec
,
629 infoCategoryWriter
.catBodyInfo
);
631 if (!infoCategoryWriter
.catNameInfo
.valid
) {
632 lld::macho::Defined
*catNameSym
=
633 tryGetDefinedAtIsecOffset(catInfo
.catBodyIsec
, catLayout
.nameOffset
);
636 // This is an unhandeled case where the category name is not a symbol but
637 // instead points to an CStringInputSection (that doesn't have any symbol)
638 // TODO: Find a small repro and either fix or add a test case for this
643 collectSectionWriteInfoFromIsec(catNameSym
->isec(),
644 infoCategoryWriter
.catNameInfo
);
647 // Collect writer info from all the category lists (we're assuming they all
648 // would provide the same info)
649 if (!infoCategoryWriter
.catPtrListInfo
.valid
) {
650 for (uint32_t off
= catLayout
.instanceMethodsOffset
;
651 off
<= catLayout
.classPropsOffset
; off
+= target
->wordSize
) {
652 if (Defined
*ptrList
=
653 tryGetDefinedAtIsecOffset(catInfo
.catBodyIsec
, off
)) {
654 collectSectionWriteInfoFromIsec(ptrList
->isec(),
655 infoCategoryWriter
.catPtrListInfo
);
656 // we've successfully collected data, so we can break
665 // Parse a protocol list that might be linked to ConcatInputSection at a given
666 // offset. The format of the protocol list is different than other lists (prop
667 // lists, method lists) so we need to parse it differently
668 void ObjcCategoryMerger::parseProtocolListInfo(
669 const ConcatInputSection
*isec
, uint32_t secOffset
,
670 PointerListInfo
&ptrList
, [[maybe_unused
]] SourceLanguage sourceLang
) {
671 assert((isec
&& (secOffset
+ target
->wordSize
<= isec
->data
.size())) &&
672 "Tried to read pointer list beyond protocol section end");
674 const Reloc
*reloc
= isec
->getRelocAt(secOffset
);
678 auto *ptrListSym
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
679 assert(ptrListSym
&& "Protocol list reloc does not have a valid Defined");
681 // Theoretically protocol count can be either 32b or 64b, depending on
682 // platform pointer size, but to simplify implementation we always just read
683 // the lower 32b which should be good enough.
684 uint32_t protocolCount
= *reinterpret_cast<const uint32_t *>(
685 ptrListSym
->isec()->data
.data() + listHeaderLayout
.structSizeOffset
);
687 ptrList
.structCount
+= protocolCount
;
688 ptrList
.structSize
= target
->wordSize
;
690 [[maybe_unused
]] uint32_t expectedListSize
=
691 (protocolCount
* target
->wordSize
) +
692 /*header(count)*/ protocolListHeaderLayout
.totalSize
+
693 /*extra null value*/ target
->wordSize
;
695 // On Swift, the protocol list does not have the extra (unnecessary) null
696 [[maybe_unused
]] uint32_t expectedListSizeSwift
=
697 expectedListSize
- target
->wordSize
;
699 assert(((expectedListSize
== ptrListSym
->isec()->data
.size() &&
700 sourceLang
== SourceLanguage::ObjC
) ||
701 (expectedListSizeSwift
== ptrListSym
->isec()->data
.size() &&
702 sourceLang
== SourceLanguage::Swift
)) &&
703 "Protocol list does not match expected size");
705 uint32_t off
= protocolListHeaderLayout
.totalSize
;
706 for (uint32_t inx
= 0; inx
< protocolCount
; ++inx
) {
707 const Reloc
*reloc
= ptrListSym
->isec()->getRelocAt(off
);
708 assert(reloc
&& "No reloc found at protocol list offset");
710 auto *listSym
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
711 assert(listSym
&& "Protocol list reloc does not have a valid Defined");
713 ptrList
.allPtrs
.push_back(listSym
);
714 off
+= target
->wordSize
;
716 assert((ptrListSym
->isec()->getRelocAt(off
) == nullptr) &&
717 "expected null terminating protocol");
718 assert(off
+ /*extra null value*/ target
->wordSize
== expectedListSize
&&
719 "Protocol list end offset does not match expected size");
722 // Parse a protocol list and return the PointerListInfo for it
723 ObjcCategoryMerger::PointerListInfo
724 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection
*isec
,
726 SourceLanguage sourceLang
) {
727 PointerListInfo ptrList
;
728 parseProtocolListInfo(isec
, secOffset
, ptrList
, sourceLang
);
732 // Parse a pointer list that might be linked to ConcatInputSection at a given
733 // offset. This can be used for instance methods, class methods, instance props
734 // and class props since they have the same format.
735 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection
*isec
,
737 PointerListInfo
&ptrList
) {
738 assert(ptrList
.pointersPerStruct
== 2 || ptrList
.pointersPerStruct
== 3);
739 assert(isec
&& "Trying to parse pointer list from null isec");
740 assert(secOffset
+ target
->wordSize
<= isec
->data
.size() &&
741 "Trying to read pointer list beyond section end");
743 const Reloc
*reloc
= isec
->getRelocAt(secOffset
);
744 // Empty list is a valid case, return true.
748 auto *ptrListSym
= dyn_cast_or_null
<Defined
>(reloc
->referent
.get
<Symbol
*>());
749 assert(ptrListSym
&& "Reloc does not have a valid Defined");
751 uint32_t thisStructSize
= *reinterpret_cast<const uint32_t *>(
752 ptrListSym
->isec()->data
.data() + listHeaderLayout
.structSizeOffset
);
753 uint32_t thisStructCount
= *reinterpret_cast<const uint32_t *>(
754 ptrListSym
->isec()->data
.data() + listHeaderLayout
.structCountOffset
);
755 assert(thisStructSize
== ptrList
.pointersPerStruct
* target
->wordSize
);
757 assert(!ptrList
.structSize
|| (thisStructSize
== ptrList
.structSize
));
759 ptrList
.structCount
+= thisStructCount
;
760 ptrList
.structSize
= thisStructSize
;
762 uint32_t expectedListSize
=
763 listHeaderLayout
.totalSize
+ (thisStructSize
* thisStructCount
);
764 assert(expectedListSize
== ptrListSym
->isec()->data
.size() &&
765 "Pointer list does not match expected size");
767 for (uint32_t off
= listHeaderLayout
.totalSize
; off
< expectedListSize
;
768 off
+= target
->wordSize
) {
769 const Reloc
*reloc
= ptrListSym
->isec()->getRelocAt(off
);
770 assert(reloc
&& "No reloc found at pointer list offset");
773 dyn_cast_or_null
<Defined
>(reloc
->referent
.dyn_cast
<Symbol
*>());
774 // Sometimes, the reloc points to a StringPiece (InputSection + addend)
775 // instead of a symbol.
776 // TODO: Skip these cases for now, but we should fix this.
780 ptrList
.allPtrs
.push_back(listSym
);
786 // Here we parse all the information of an input category (catInfo) and
787 // append the parsed info into the structure which will contain all the
788 // information about how a class is extended (extInfo)
789 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory
&catInfo
,
790 ClassExtensionInfo
&extInfo
) {
791 const Reloc
*catNameReloc
=
792 catInfo
.catBodyIsec
->getRelocAt(catLayout
.nameOffset
);
795 assert(catNameReloc
&& "Category does not have a reloc at 'nameOffset'");
797 // is this the first category we are parsing?
798 if (extInfo
.mergedContainerName
.empty())
799 extInfo
.objFileForMergeData
=
800 dyn_cast_or_null
<ObjFile
>(catInfo
.catBodyIsec
->getFile());
802 extInfo
.mergedContainerName
+= "|";
804 assert(extInfo
.objFileForMergeData
&&
805 "Expected to already have valid objextInfo.objFileForMergeData");
807 StringRef catName
= catNameReloc
->getReferentString();
808 extInfo
.mergedContainerName
+= catName
.str();
811 if (!extInfo
.baseClass
) {
813 tryGetSymbolAtIsecOffset(catInfo
.catBodyIsec
, catLayout
.klassOffset
);
814 assert(extInfo
.baseClassName
.empty());
815 extInfo
.baseClass
= classSym
;
816 llvm::StringRef
classPrefix(objc::symbol_names::klass
);
817 assert(classSym
->getName().starts_with(classPrefix
) &&
818 "Base class symbol does not start with expected prefix");
819 extInfo
.baseClassName
= classSym
->getName().substr(classPrefix
.size());
821 assert((extInfo
.baseClass
==
822 tryGetSymbolAtIsecOffset(catInfo
.catBodyIsec
,
823 catLayout
.klassOffset
)) &&
824 "Trying to parse category info into container with different base "
828 if (!parsePointerListInfo(catInfo
.catBodyIsec
,
829 catLayout
.instanceMethodsOffset
,
830 extInfo
.instanceMethods
))
833 if (!parsePointerListInfo(catInfo
.catBodyIsec
, catLayout
.classMethodsOffset
,
834 extInfo
.classMethods
))
837 parseProtocolListInfo(catInfo
.catBodyIsec
, catLayout
.protocolsOffset
,
838 extInfo
.protocols
, catInfo
.sourceLanguage
);
840 if (!parsePointerListInfo(catInfo
.catBodyIsec
, catLayout
.instancePropsOffset
,
841 extInfo
.instanceProps
))
844 if (!parsePointerListInfo(catInfo
.catBodyIsec
, catLayout
.classPropsOffset
,
851 // Generate a protocol list (including header) and link it into the parent at
852 // the specified offset.
853 Defined
*ObjcCategoryMerger::emitAndLinkProtocolList(
854 Defined
*parentSym
, uint32_t linkAtOffset
,
855 const ClassExtensionInfo
&extInfo
, const PointerListInfo
&ptrList
) {
856 if (ptrList
.allPtrs
.empty())
859 assert(ptrList
.allPtrs
.size() == ptrList
.structCount
);
861 uint32_t bodySize
= (ptrList
.structCount
* target
->wordSize
) +
862 /*header(count)*/ protocolListHeaderLayout
.totalSize
+
863 /*extra null value*/ target
->wordSize
;
864 llvm::ArrayRef
<uint8_t> bodyData
= newSectionData(bodySize
);
866 // This theoretically can be either 32b or 64b, but writing just the first 32b
868 const uint32_t *ptrProtoCount
= reinterpret_cast<const uint32_t *>(
869 bodyData
.data() + protocolListHeaderLayout
.protocolCountOffset
);
871 *const_cast<uint32_t *>(ptrProtoCount
) = ptrList
.allPtrs
.size();
873 ConcatInputSection
*listSec
= make
<ConcatInputSection
>(
874 *infoCategoryWriter
.catPtrListInfo
.inputSection
, bodyData
,
875 infoCategoryWriter
.catPtrListInfo
.align
);
876 listSec
->parent
= infoCategoryWriter
.catPtrListInfo
.outputSection
;
877 listSec
->live
= true;
879 listSec
->parent
= infoCategoryWriter
.catPtrListInfo
.outputSection
;
881 std::string symName
= ptrList
.categoryPrefix
;
882 symName
+= extInfo
.baseClassName
+ "(" + extInfo
.mergedContainerName
+ ")";
884 Defined
*ptrListSym
= make
<Defined
>(
885 newStringData(symName
.c_str()), /*file=*/parentSym
->getObjectFile(),
886 listSec
, /*value=*/0, bodyData
.size(), /*isWeakDef=*/false,
887 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
888 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
889 /*isWeakDefCanBeHidden=*/false);
891 ptrListSym
->used
= true;
892 parentSym
->getObjectFile()->symbols
.push_back(ptrListSym
);
893 addInputSection(listSec
);
895 createSymbolReference(parentSym
, ptrListSym
, linkAtOffset
,
896 infoCategoryWriter
.catBodyInfo
.relocTemplate
);
898 uint32_t offset
= protocolListHeaderLayout
.totalSize
;
899 for (Symbol
*symbol
: ptrList
.allPtrs
) {
900 createSymbolReference(ptrListSym
, symbol
, offset
,
901 infoCategoryWriter
.catPtrListInfo
.relocTemplate
);
902 offset
+= target
->wordSize
;
908 // Generate a pointer list (including header) and link it into the parent at the
909 // specified offset. This is used for instance and class methods and
911 void ObjcCategoryMerger::emitAndLinkPointerList(
912 Defined
*parentSym
, uint32_t linkAtOffset
,
913 const ClassExtensionInfo
&extInfo
, const PointerListInfo
&ptrList
) {
914 if (ptrList
.allPtrs
.empty())
917 assert(ptrList
.allPtrs
.size() * target
->wordSize
==
918 ptrList
.structCount
* ptrList
.structSize
);
922 listHeaderLayout
.totalSize
+ (ptrList
.structSize
* ptrList
.structCount
);
923 llvm::ArrayRef
<uint8_t> bodyData
= newSectionData(bodySize
);
925 const uint32_t *ptrStructSize
= reinterpret_cast<const uint32_t *>(
926 bodyData
.data() + listHeaderLayout
.structSizeOffset
);
927 const uint32_t *ptrStructCount
= reinterpret_cast<const uint32_t *>(
928 bodyData
.data() + listHeaderLayout
.structCountOffset
);
930 *const_cast<uint32_t *>(ptrStructSize
) = ptrList
.structSize
;
931 *const_cast<uint32_t *>(ptrStructCount
) = ptrList
.structCount
;
933 ConcatInputSection
*listSec
= make
<ConcatInputSection
>(
934 *infoCategoryWriter
.catPtrListInfo
.inputSection
, bodyData
,
935 infoCategoryWriter
.catPtrListInfo
.align
);
936 listSec
->parent
= infoCategoryWriter
.catPtrListInfo
.outputSection
;
937 listSec
->live
= true;
939 listSec
->parent
= infoCategoryWriter
.catPtrListInfo
.outputSection
;
941 std::string symName
= ptrList
.categoryPrefix
;
942 symName
+= extInfo
.baseClassName
+ "(" + extInfo
.mergedContainerName
+ ")";
944 Defined
*ptrListSym
= make
<Defined
>(
945 newStringData(symName
.c_str()), /*file=*/parentSym
->getObjectFile(),
946 listSec
, /*value=*/0, bodyData
.size(), /*isWeakDef=*/false,
947 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
948 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
949 /*isWeakDefCanBeHidden=*/false);
951 ptrListSym
->used
= true;
952 parentSym
->getObjectFile()->symbols
.push_back(ptrListSym
);
953 addInputSection(listSec
);
955 createSymbolReference(parentSym
, ptrListSym
, linkAtOffset
,
956 infoCategoryWriter
.catBodyInfo
.relocTemplate
);
958 uint32_t offset
= listHeaderLayout
.totalSize
;
959 for (Symbol
*symbol
: ptrList
.allPtrs
) {
960 createSymbolReference(ptrListSym
, symbol
, offset
,
961 infoCategoryWriter
.catPtrListInfo
.relocTemplate
);
962 offset
+= target
->wordSize
;
966 // This method creates an __objc_catlist ConcatInputSection with a single slot
968 ObjcCategoryMerger::emitCatListEntrySec(const std::string
&forCategoryName
,
969 const std::string
&forBaseClassName
,
971 uint32_t sectionSize
= target
->wordSize
;
972 llvm::ArrayRef
<uint8_t> bodyData
= newSectionData(sectionSize
);
974 ConcatInputSection
*newCatList
=
975 make
<ConcatInputSection
>(*infoCategoryWriter
.catListInfo
.inputSection
,
976 bodyData
, infoCategoryWriter
.catListInfo
.align
);
977 newCatList
->parent
= infoCategoryWriter
.catListInfo
.outputSection
;
978 newCatList
->live
= true;
980 newCatList
->parent
= infoCategoryWriter
.catListInfo
.outputSection
;
982 std::string catSymName
= "<__objc_catlist slot for merged category ";
983 catSymName
+= forBaseClassName
+ "(" + forCategoryName
+ ")>";
985 Defined
*catListSym
= make
<Defined
>(
986 newStringData(catSymName
.c_str()), /*file=*/objFile
, newCatList
,
987 /*value=*/0, bodyData
.size(), /*isWeakDef=*/false, /*isExternal=*/false,
988 /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
989 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
990 /*isWeakDefCanBeHidden=*/false);
992 catListSym
->used
= true;
993 objFile
->symbols
.push_back(catListSym
);
994 addInputSection(newCatList
);
998 // Here we generate the main category body and link the name and base class into
999 // it. We don't link any other info yet like the protocol and class/instance
1001 Defined
*ObjcCategoryMerger::emitCategoryBody(const std::string
&name
,
1002 const Defined
*nameSym
,
1003 const Symbol
*baseClassSym
,
1004 const std::string
&baseClassName
,
1006 llvm::ArrayRef
<uint8_t> bodyData
= newSectionData(catLayout
.totalSize
);
1008 uint32_t *ptrSize
= (uint32_t *)(const_cast<uint8_t *>(bodyData
.data()) +
1009 catLayout
.sizeOffset
);
1010 *ptrSize
= catLayout
.totalSize
;
1012 ConcatInputSection
*newBodySec
=
1013 make
<ConcatInputSection
>(*infoCategoryWriter
.catBodyInfo
.inputSection
,
1014 bodyData
, infoCategoryWriter
.catBodyInfo
.align
);
1015 newBodySec
->parent
= infoCategoryWriter
.catBodyInfo
.outputSection
;
1016 newBodySec
->live
= true;
1018 std::string symName
=
1019 objc::symbol_names::category
+ baseClassName
+ "(" + name
+ ")";
1020 Defined
*catBodySym
= make
<Defined
>(
1021 newStringData(symName
.c_str()), /*file=*/objFile
, newBodySec
,
1022 /*value=*/0, bodyData
.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1023 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1024 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1025 /*isWeakDefCanBeHidden=*/false);
1027 catBodySym
->used
= true;
1028 objFile
->symbols
.push_back(catBodySym
);
1029 addInputSection(newBodySec
);
1031 createSymbolReference(catBodySym
, nameSym
, catLayout
.nameOffset
,
1032 infoCategoryWriter
.catBodyInfo
.relocTemplate
);
1034 // Create a reloc to the base class (either external or internal)
1035 createSymbolReference(catBodySym
, baseClassSym
, catLayout
.klassOffset
,
1036 infoCategoryWriter
.catBodyInfo
.relocTemplate
);
1041 // This writes the new category name (for the merged category) into the binary
1042 // and returns the sybmol for it.
1043 Defined
*ObjcCategoryMerger::emitCategoryName(const std::string
&name
,
1045 StringRef nameStrData
= newStringData(name
.c_str());
1046 // We use +1 below to include the null terminator
1047 llvm::ArrayRef
<uint8_t> nameData(
1048 reinterpret_cast<const uint8_t *>(nameStrData
.data()),
1049 nameStrData
.size() + 1);
1051 auto *parentSection
= infoCategoryWriter
.catNameInfo
.inputSection
;
1052 CStringInputSection
*newStringSec
= make
<CStringInputSection
>(
1053 *infoCategoryWriter
.catNameInfo
.inputSection
, nameData
,
1054 infoCategoryWriter
.catNameInfo
.align
, /*dedupLiterals=*/true);
1056 parentSection
->subsections
.push_back({0, newStringSec
});
1058 newStringSec
->splitIntoPieces();
1059 newStringSec
->pieces
[0].live
= true;
1060 newStringSec
->parent
= infoCategoryWriter
.catNameInfo
.outputSection
;
1061 in
.cStringSection
->addInput(newStringSec
);
1062 assert(newStringSec
->pieces
.size() == 1);
1064 Defined
*catNameSym
= make
<Defined
>(
1065 "<merged category name>", /*file=*/objFile
, newStringSec
,
1066 /*value=*/0, nameData
.size(),
1067 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1068 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1069 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1071 catNameSym
->used
= true;
1072 objFile
->symbols
.push_back(catNameSym
);
1076 // This method fully creates a new category from the given ClassExtensionInfo.
1077 // It creates the category name, body and method/protocol/prop lists and links
1078 // them all together. Then it creates a new __objc_catlist entry and adds the
1079 // category to it. Calling this method will fully generate a category which will
1080 // be available in the final binary.
1081 Defined
*ObjcCategoryMerger::emitCategory(const ClassExtensionInfo
&extInfo
) {
1082 Defined
*catNameSym
= emitCategoryName(extInfo
.mergedContainerName
,
1083 extInfo
.objFileForMergeData
);
1085 Defined
*catBodySym
= emitCategoryBody(
1086 extInfo
.mergedContainerName
, catNameSym
, extInfo
.baseClass
,
1087 extInfo
.baseClassName
, extInfo
.objFileForMergeData
);
1089 Defined
*catListSym
=
1090 emitCatListEntrySec(extInfo
.mergedContainerName
, extInfo
.baseClassName
,
1091 extInfo
.objFileForMergeData
);
1093 // Add the single category body to the category list at the offset 0.
1094 createSymbolReference(catListSym
, catBodySym
, /*offset=*/0,
1095 infoCategoryWriter
.catListInfo
.relocTemplate
);
1097 emitAndLinkPointerList(catBodySym
, catLayout
.instanceMethodsOffset
, extInfo
,
1098 extInfo
.instanceMethods
);
1100 emitAndLinkPointerList(catBodySym
, catLayout
.classMethodsOffset
, extInfo
,
1101 extInfo
.classMethods
);
1103 emitAndLinkProtocolList(catBodySym
, catLayout
.protocolsOffset
, extInfo
,
1106 emitAndLinkPointerList(catBodySym
, catLayout
.instancePropsOffset
, extInfo
,
1107 extInfo
.instanceProps
);
1109 emitAndLinkPointerList(catBodySym
, catLayout
.classPropsOffset
, extInfo
,
1110 extInfo
.classProps
);
1115 // This method merges all the categories (sharing a base class) into a single
1117 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1118 std::vector
<InfoInputCategory
> &categories
) {
1119 assert(categories
.size() > 1 && "Expected at least 2 categories");
1121 ClassExtensionInfo
extInfo(catLayout
);
1123 for (auto &catInfo
: categories
)
1124 if (!parseCatInfoToExtInfo(catInfo
, extInfo
))
1127 Defined
*newCatDef
= emitCategory(extInfo
);
1128 assert(newCatDef
&& "Failed to create a new category");
1130 // Suppress unsuded var warning
1133 for (auto &catInfo
: categories
)
1134 catInfo
.wasMerged
= true;
1139 void ObjcCategoryMerger::createSymbolReference(Defined
*refFrom
,
1140 const Symbol
*refTo
,
1142 const Reloc
&relocTemplate
) {
1143 Reloc r
= relocTemplate
;
1146 r
.referent
= const_cast<Symbol
*>(refTo
);
1147 refFrom
->isec()->relocs
.push_back(r
);
1150 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1151 // optimize these as they have a '+load' method that has to be called at
1153 DenseSet
<const Symbol
*> ObjcCategoryMerger::collectNlCategories() {
1154 DenseSet
<const Symbol
*> nlCategories
;
1156 for (InputSection
*sec
: allInputSections
) {
1157 if (sec
->getName() != section_names::objcNonLazyCatList
)
1160 for (auto &r
: sec
->relocs
) {
1161 const Symbol
*sym
= r
.referent
.dyn_cast
<Symbol
*>();
1162 nlCategories
.insert(sym
);
1165 return nlCategories
;
1168 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1169 auto nlCategories
= collectNlCategories();
1171 for (InputSection
*sec
: allInputSections
) {
1172 if (sec
->getName() != section_names::objcCatList
)
1174 ConcatInputSection
*catListCisec
= dyn_cast
<ConcatInputSection
>(sec
);
1175 assert(catListCisec
&&
1176 "__objc_catList InputSection is not a ConcatInputSection");
1178 for (uint32_t off
= 0; off
< catListCisec
->getSize();
1179 off
+= target
->wordSize
) {
1180 Defined
*categorySym
= tryGetDefinedAtIsecOffset(catListCisec
, off
);
1181 assert(categorySym
&&
1182 "Failed to get a valid category at __objc_catlit offset");
1184 if (nlCategories
.count(categorySym
))
1187 auto *catBodyIsec
= dyn_cast
<ConcatInputSection
>(categorySym
->isec());
1188 assert(catBodyIsec
&&
1189 "Category data section is not an ConcatInputSection");
1191 SourceLanguage eLang
= SourceLanguage::Unknown
;
1192 if (categorySym
->getName().starts_with(objc::symbol_names::category
))
1193 eLang
= SourceLanguage::ObjC
;
1194 else if (categorySym
->getName().starts_with(
1195 objc::symbol_names::swift_objc_category
))
1196 eLang
= SourceLanguage::Swift
;
1198 llvm_unreachable("Unexpected category symbol name");
1200 InfoInputCategory catInputInfo
{catListCisec
, catBodyIsec
, off
, eLang
};
1202 // Check that the category has a reloc at 'klassOffset' (which is
1203 // a pointer to the class symbol)
1206 tryGetSymbolAtIsecOffset(catBodyIsec
, catLayout
.klassOffset
);
1207 assert(classSym
&& "Category does not have a valid base class");
1209 if (!collectCategoryWriterInfoFromCategory(catInputInfo
))
1212 categoryMap
[classSym
].push_back(catInputInfo
);
1217 // In the input we have multiple __objc_catlist InputSection, each of which may
1218 // contain links to multiple categories. Of these categories, we will merge (and
1219 // erase) only some. There will be some categories that will remain untouched
1220 // (not erased). For these not erased categories, we generate new __objc_catlist
1221 // entries since the parent __objc_catlist entry will be erased
1222 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1223 const MapVector
<ConcatInputSection
*, std::set
<uint64_t>>
1224 catListToErasedOffsets
) {
1226 // Go through all offsets of all __objc_catlist's that we process and if there
1227 // are categories that we didn't process - generate a new __objc_catlist for
1229 for (auto &mapEntry
: catListToErasedOffsets
) {
1230 ConcatInputSection
*catListIsec
= mapEntry
.first
;
1231 for (uint32_t catListIsecOffset
= 0;
1232 catListIsecOffset
< catListIsec
->data
.size();
1233 catListIsecOffset
+= target
->wordSize
) {
1234 // This slot was erased, we can just skip it
1235 if (mapEntry
.second
.count(catListIsecOffset
))
1238 Defined
*nonErasedCatBody
=
1239 tryGetDefinedAtIsecOffset(catListIsec
, catListIsecOffset
);
1240 assert(nonErasedCatBody
&& "Failed to relocate non-deleted category");
1242 // Allocate data for the new __objc_catlist slot
1243 llvm::ArrayRef
<uint8_t> bodyData
= newSectionData(target
->wordSize
);
1245 // We mark the __objc_catlist slot as belonging to the same file as the
1247 ObjFile
*objFile
= dyn_cast
<ObjFile
>(nonErasedCatBody
->getFile());
1249 ConcatInputSection
*listSec
= make
<ConcatInputSection
>(
1250 *infoCategoryWriter
.catListInfo
.inputSection
, bodyData
,
1251 infoCategoryWriter
.catListInfo
.align
);
1252 listSec
->parent
= infoCategoryWriter
.catListInfo
.outputSection
;
1253 listSec
->live
= true;
1255 std::string slotSymName
= "<__objc_catlist slot for category ";
1256 slotSymName
+= nonErasedCatBody
->getName();
1259 Defined
*catListSlotSym
= make
<Defined
>(
1260 newStringData(slotSymName
.c_str()), /*file=*/objFile
, listSec
,
1261 /*value=*/0, bodyData
.size(),
1262 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1263 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1264 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1266 catListSlotSym
->used
= true;
1267 objFile
->symbols
.push_back(catListSlotSym
);
1268 addInputSection(listSec
);
1270 // Now link the category body into the newly created slot
1271 createSymbolReference(catListSlotSym
, nonErasedCatBody
, 0,
1272 infoCategoryWriter
.catListInfo
.relocTemplate
);
1277 void ObjcCategoryMerger::eraseISec(ConcatInputSection
*isec
) {
1279 for (auto &sym
: isec
->symbols
)
1283 // This fully erases the merged categories, including their body, their names,
1284 // their method/protocol/prop lists and the __objc_catlist entries that link to
1286 void ObjcCategoryMerger::eraseMergedCategories() {
1287 // Map of InputSection to a set of offsets of the categories that were merged
1288 MapVector
<ConcatInputSection
*, std::set
<uint64_t>> catListToErasedOffsets
;
1290 for (auto &mapEntry
: categoryMap
) {
1291 for (InfoInputCategory
&catInfo
: mapEntry
.second
) {
1292 if (catInfo
.wasMerged
) {
1293 eraseISec(catInfo
.catListIsec
);
1294 catListToErasedOffsets
[catInfo
.catListIsec
].insert(
1295 catInfo
.offCatListIsec
);
1300 // If there were categories that we did not erase, we need to generate a new
1301 // __objc_catList that contains only the un-merged categories, and get rid of
1302 // the references to the ones we merged.
1303 generateCatListForNonErasedCategories(catListToErasedOffsets
);
1305 // Erase the old method lists & names of the categories that were merged
1306 for (auto &mapEntry
: categoryMap
) {
1307 for (InfoInputCategory
&catInfo
: mapEntry
.second
) {
1308 if (!catInfo
.wasMerged
)
1311 eraseISec(catInfo
.catBodyIsec
);
1313 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1314 // categories because the name will sometimes also be used for other
1316 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1317 // objc-category-merging-swift.s
1318 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1319 // objc-category-merging-erase-objc-name-test.s
1320 // TODO: handle the above in a smarter way
1322 tryEraseDefinedAtIsecOffset(catInfo
.catBodyIsec
,
1323 catLayout
.instanceMethodsOffset
);
1324 tryEraseDefinedAtIsecOffset(catInfo
.catBodyIsec
,
1325 catLayout
.classMethodsOffset
);
1326 tryEraseDefinedAtIsecOffset(catInfo
.catBodyIsec
,
1327 catLayout
.protocolsOffset
);
1328 tryEraseDefinedAtIsecOffset(catInfo
.catBodyIsec
,
1329 catLayout
.classPropsOffset
);
1330 tryEraseDefinedAtIsecOffset(catInfo
.catBodyIsec
,
1331 catLayout
.instancePropsOffset
);
1336 void ObjcCategoryMerger::doMerge() {
1337 collectAndValidateCategoriesData();
1339 for (auto &[baseClass
, catInfos
] : categoryMap
) {
1340 bool merged
= false;
1341 if (auto *baseClassDef
= dyn_cast
<Defined
>(baseClass
)) {
1342 // Merge all categories into the base class
1343 merged
= mergeCategoriesIntoBaseClass(baseClassDef
, catInfos
);
1344 } else if (catInfos
.size() > 1) {
1345 // Merge all categories into a new, single category
1346 merged
= mergeCategoriesIntoSingleCategory(catInfos
);
1349 warn("ObjC category merging skipped for class symbol' " +
1350 baseClass
->getName().str() + "'\n");
1353 // Erase all categories that were merged
1354 eraseMergedCategories();
1357 void ObjcCategoryMerger::doCleanup() { generatedSectionData
.clear(); }
1359 StringRef
ObjcCategoryMerger::newStringData(const char *str
) {
1360 uint32_t len
= strlen(str
);
1361 uint32_t bufSize
= len
+ 1;
1362 SmallVector
<uint8_t> &data
= newSectionData(bufSize
);
1363 char *strData
= reinterpret_cast<char *>(data
.data());
1364 // Copy the string chars and null-terminator
1365 memcpy(strData
, str
, bufSize
);
1366 return StringRef(strData
, len
);
1369 SmallVector
<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size
) {
1370 generatedSectionData
.push_back(
1371 std::make_unique
<SmallVector
<uint8_t>>(size
, 0));
1372 return *generatedSectionData
.back();
1377 void objc::mergeCategories() {
1378 TimeTraceScope
timeScope("ObjcCategoryMerger");
1380 ObjcCategoryMerger
merger(inputSections
);
1384 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1386 ObjcCategoryMerger::SourceLanguage
1387 ObjcCategoryMerger::getClassSymSourceLang(const Defined
*classSym
) {
1388 if (classSym
->getName().starts_with(objc::symbol_names::swift_objc_klass
))
1389 return SourceLanguage::Swift
;
1391 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1392 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1394 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1396 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1398 // So we scan for symbols with the same address and check for the Swift class
1399 if (classSym
->getName().starts_with(objc::symbol_names::klass
)) {
1400 for (auto &sym
: classSym
->originalIsec
->symbols
)
1401 if (sym
->value
== classSym
->value
)
1402 if (sym
->getName().starts_with(objc::symbol_names::swift_objc_klass
))
1403 return SourceLanguage::Swift
;
1404 return SourceLanguage::ObjC
;
1407 llvm_unreachable("Unexpected class symbol name during category merging");
1410 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1411 const Defined
*baseClass
, std::vector
<InfoInputCategory
> &categories
) {
1412 assert(categories
.size() >= 1 && "Expected at least one category to merge");
1414 // Collect all the info from the categories
1415 ClassExtensionInfo
extInfo(catLayout
);
1416 extInfo
.baseClass
= baseClass
;
1417 extInfo
.baseClassSourceLanguage
= getClassSymSourceLang(baseClass
);
1419 for (auto &catInfo
: categories
)
1420 if (!parseCatInfoToExtInfo(catInfo
, extInfo
))
1423 // Get metadata for the base class
1424 Defined
*metaRo
= getClassRo(baseClass
, /*getMetaRo=*/true);
1425 ConcatInputSection
*metaIsec
= dyn_cast
<ConcatInputSection
>(metaRo
->isec());
1426 Defined
*classRo
= getClassRo(baseClass
, /*getMetaRo=*/false);
1427 ConcatInputSection
*classIsec
= dyn_cast
<ConcatInputSection
>(classRo
->isec());
1429 // Now collect the info from the base class from the various lists in the
1432 // Protocol lists are a special case - the same protocol list is in classRo
1433 // and metaRo, so we only need to parse it once
1434 parseProtocolListInfo(classIsec
, roClassLayout
.baseProtocolsOffset
,
1435 extInfo
.protocols
, extInfo
.baseClassSourceLanguage
);
1437 // Check that the classRo and metaRo protocol lists are identical
1438 assert(parseProtocolListInfo(classIsec
, roClassLayout
.baseProtocolsOffset
,
1439 extInfo
.baseClassSourceLanguage
) ==
1440 parseProtocolListInfo(metaIsec
, roClassLayout
.baseProtocolsOffset
,
1441 extInfo
.baseClassSourceLanguage
) &&
1442 "Category merger expects classRo and metaRo to have the same protocol "
1445 parsePointerListInfo(metaIsec
, roClassLayout
.baseMethodsOffset
,
1446 extInfo
.classMethods
);
1447 parsePointerListInfo(classIsec
, roClassLayout
.baseMethodsOffset
,
1448 extInfo
.instanceMethods
);
1450 parsePointerListInfo(metaIsec
, roClassLayout
.basePropertiesOffset
,
1451 extInfo
.classProps
);
1452 parsePointerListInfo(classIsec
, roClassLayout
.basePropertiesOffset
,
1453 extInfo
.instanceProps
);
1455 // Erase the old lists - these will be generated and replaced
1456 eraseSymbolAtIsecOffset(metaIsec
, roClassLayout
.baseMethodsOffset
);
1457 eraseSymbolAtIsecOffset(metaIsec
, roClassLayout
.baseProtocolsOffset
);
1458 eraseSymbolAtIsecOffset(metaIsec
, roClassLayout
.basePropertiesOffset
);
1459 eraseSymbolAtIsecOffset(classIsec
, roClassLayout
.baseMethodsOffset
);
1460 eraseSymbolAtIsecOffset(classIsec
, roClassLayout
.baseProtocolsOffset
);
1461 eraseSymbolAtIsecOffset(classIsec
, roClassLayout
.basePropertiesOffset
);
1463 // Emit the newly merged lists - first into the meta RO then into the class RO
1464 // First we emit and link the protocol list into the meta RO. Then we link it
1465 // in the classRo as well (they're supposed to be identical)
1466 if (Defined
*protoListSym
=
1467 emitAndLinkProtocolList(metaRo
, roClassLayout
.baseProtocolsOffset
,
1468 extInfo
, extInfo
.protocols
)) {
1469 createSymbolReference(classRo
, protoListSym
,
1470 roClassLayout
.baseProtocolsOffset
,
1471 infoCategoryWriter
.catBodyInfo
.relocTemplate
);
1474 emitAndLinkPointerList(metaRo
, roClassLayout
.baseMethodsOffset
, extInfo
,
1475 extInfo
.classMethods
);
1476 emitAndLinkPointerList(classRo
, roClassLayout
.baseMethodsOffset
, extInfo
,
1477 extInfo
.instanceMethods
);
1479 emitAndLinkPointerList(metaRo
, roClassLayout
.basePropertiesOffset
, extInfo
,
1480 extInfo
.classProps
);
1482 emitAndLinkPointerList(classRo
, roClassLayout
.basePropertiesOffset
, extInfo
,
1483 extInfo
.instanceProps
);
1485 // Mark all the categories as merged - this will be used to erase them later
1486 for (auto &catInfo
: categories
)
1487 catInfo
.wasMerged
= true;
1492 // Erase the symbol at a given offset in an InputSection
1493 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection
*isec
,
1495 Defined
*sym
= tryGetDefinedAtIsecOffset(isec
, offset
);
1499 // Remove the symbol from isec->symbols
1500 assert(isa
<Defined
>(sym
) && "Can only erase a Defined");
1501 llvm::erase(isec
->symbols
, sym
);
1503 // Remove the relocs that refer to this symbol
1504 auto removeAtOff
= [offset
](Reloc
const &r
) { return r
.offset
== offset
; };
1505 llvm::erase_if(isec
->relocs
, removeAtOff
);
1507 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1508 // the whole ConcatInputSection
1509 if (ConcatInputSection
*cisec
= dyn_cast
<ConcatInputSection
>(sym
->isec()))
1510 if (cisec
->data
.size() == sym
->size
)