[flang] Treat pre-processed input as fixed (#117563)
[llvm-project.git] / lld / MachO / ObjC.cpp
blobff13e8eb4b5ce0defaa71e332f0b9c07378dc633
1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Support/TimeProfiler.h"
24 using namespace llvm;
25 using namespace llvm::MachO;
26 using namespace lld;
27 using namespace lld::macho;
29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30 using SectionHeader = typename LP::section;
32 auto *hdr =
33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34 if (hdr->magic != LP::magic)
35 return false;
37 if (const auto *c =
38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39 auto sectionHeaders = ArrayRef<SectionHeader>{
40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41 for (const SectionHeader &secHead : sectionHeaders) {
42 StringRef sectname(secHead.sectname,
43 strnlen(secHead.sectname, sizeof(secHead.sectname)));
44 StringRef segname(secHead.segname,
45 strnlen(secHead.segname, sizeof(secHead.segname)));
46 if ((segname == segment_names::data &&
47 sectname == section_names::objcCatList) ||
48 (segname == segment_names::text &&
49 sectname.starts_with(section_names::swift))) {
50 return true;
54 return false;
57 static bool objectHasObjCSection(MemoryBufferRef mb) {
58 if (target->wordSize == 8)
59 return ::objectHasObjCSection<LP64>(mb);
60 else
61 return ::objectHasObjCSection<ILP32>(mb);
64 bool macho::hasObjCSection(MemoryBufferRef mb) {
65 switch (identify_magic(mb.getBuffer())) {
66 case file_magic::macho_object:
67 return objectHasObjCSection(mb);
68 case file_magic::bitcode:
69 return check(isBitcodeContainingObjCCategory(mb));
70 default:
71 return false;
75 namespace {
77 #define FOR_EACH_CATEGORY_FIELD(DO) \
78 DO(Ptr, name) \
79 DO(Ptr, klass) \
80 DO(Ptr, instanceMethods) \
81 DO(Ptr, classMethods) \
82 DO(Ptr, protocols) \
83 DO(Ptr, instanceProps) \
84 DO(Ptr, classProps) \
85 DO(uint32_t, size)
87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
89 #undef FOR_EACH_CATEGORY_FIELD
91 #define FOR_EACH_CLASS_FIELD(DO) \
92 DO(Ptr, metaClass) \
93 DO(Ptr, superClass) \
94 DO(Ptr, methodCache) \
95 DO(Ptr, vtable) \
96 DO(Ptr, roData)
98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
100 #undef FOR_EACH_CLASS_FIELD
102 #define FOR_EACH_RO_CLASS_FIELD(DO) \
103 DO(uint32_t, flags) \
104 DO(uint32_t, instanceStart) \
105 DO(Ptr, instanceSize) \
106 DO(Ptr, ivarLayout) \
107 DO(Ptr, name) \
108 DO(Ptr, baseMethods) \
109 DO(Ptr, baseProtocols) \
110 DO(Ptr, ivars) \
111 DO(Ptr, weakIvarLayout) \
112 DO(Ptr, baseProperties)
114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
116 #undef FOR_EACH_RO_CLASS_FIELD
118 #define FOR_EACH_LIST_HEADER(DO) \
119 DO(uint32_t, structSize) \
120 DO(uint32_t, structCount)
122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
124 #undef FOR_EACH_LIST_HEADER
126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
130 #undef FOR_EACH_PROTOCOL_LIST_HEADER
132 #define FOR_EACH_METHOD(DO) \
133 DO(Ptr, name) \
134 DO(Ptr, type) \
135 DO(Ptr, impl)
137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
139 #undef FOR_EACH_METHOD
141 enum MethodContainerKind {
142 MCK_Class,
143 MCK_Category,
146 struct MethodContainer {
147 MethodContainerKind kind;
148 const ConcatInputSection *isec;
151 enum MethodKind {
152 MK_Instance,
153 MK_Static,
156 struct ObjcClass {
157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
161 } // namespace
163 class ObjcCategoryChecker {
164 public:
165 ObjcCategoryChecker();
166 void parseCategory(const ConcatInputSection *catListIsec);
168 private:
169 void parseClass(const Defined *classSym);
170 void parseMethods(const ConcatInputSection *methodsIsec,
171 const Symbol *methodContainer,
172 const ConcatInputSection *containerIsec,
173 MethodContainerKind, MethodKind);
175 CategoryLayout catLayout;
176 ClassLayout classLayout;
177 ROClassLayout roClassLayout;
178 ListHeaderLayout listHeaderLayout;
179 MethodLayout methodLayout;
181 DenseMap<const Symbol *, ObjcClass> classMap;
184 ObjcCategoryChecker::ObjcCategoryChecker()
185 : catLayout(target->wordSize), classLayout(target->wordSize),
186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187 methodLayout(target->wordSize) {}
189 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
190 const Symbol *methodContainerSym,
191 const ConcatInputSection *containerIsec,
192 MethodContainerKind mcKind,
193 MethodKind mKind) {
194 ObjcClass &klass = classMap[methodContainerSym];
195 for (const Reloc &r : methodsIsec->relocs) {
196 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
197 methodLayout.nameOffset)
198 continue;
200 CachedHashStringRef methodName(r.getReferentString());
201 // +load methods are special: all implementations are called by the runtime
202 // even if they are part of the same class. Thus there is no need to check
203 // for duplicates.
204 // NOTE: Instead of specifically checking for this method name, ld64 simply
205 // checks whether a class / category is present in __objc_nlclslist /
206 // __objc_nlcatlist respectively. This will be the case if the class /
207 // category has a +load method. It skips optimizing the categories if there
208 // are multiple +load methods. Since it does dupe checking as part of the
209 // optimization process, this avoids spurious dupe messages around +load,
210 // but it also means that legit dupe issues for other methods are ignored.
211 if (mKind == MK_Static && methodName.val() == "load")
212 continue;
214 auto &methodMap =
215 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
216 if (methodMap
217 .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
218 .second)
219 continue;
221 // We have a duplicate; generate a warning message.
222 const auto &mc = methodMap.lookup(methodName);
223 const Reloc *nameReloc = nullptr;
224 if (mc.kind == MCK_Category) {
225 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
226 } else {
227 assert(mc.kind == MCK_Class);
228 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
229 ->getReferentInputSection();
230 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
232 StringRef containerName = nameReloc->getReferentString();
233 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
235 // We should only ever encounter collisions when parsing category methods
236 // (since the Class struct is parsed before any of its categories).
237 assert(mcKind == MCK_Category);
238 StringRef newCatName =
239 containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString();
241 auto formatObjAndSrcFileName = [](const InputSection *section) {
242 lld::macho::InputFile *inputFile = section->getFile();
243 std::string result = toString(inputFile);
245 auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
246 if (objFile && objFile->compileUnit)
247 result += " (" + objFile->sourceFile() + ")";
249 return result;
252 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
253 warn("method '" + methPrefix + methodName.val() +
254 "' has conflicting definitions:\n>>> defined in category " +
255 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
256 "\n>>> defined in " + containerType + " " + containerName + " from " +
257 formatObjAndSrcFileName(mc.isec));
261 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
262 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
263 if (!classReloc)
264 return;
266 auto *classSym = classReloc->referent.get<Symbol *>();
267 if (auto *d = dyn_cast<Defined>(classSym))
268 if (!classMap.count(d))
269 parseClass(d);
271 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
272 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
273 classSym, catIsec, MCK_Category, MK_Static);
276 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
277 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
278 classSym, catIsec, MCK_Category, MK_Instance);
282 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
283 // Given a Class struct, get its corresponding Methods struct
284 auto getMethodsIsec =
285 [&](const InputSection *classIsec) -> ConcatInputSection * {
286 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
287 if (const auto *roIsec =
288 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
289 if (const auto *r =
290 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
291 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
292 r->getReferentInputSection()))
293 return methodsIsec;
297 return nullptr;
300 const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
302 // Parse instance methods.
303 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
304 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
305 MK_Instance);
307 // Class methods are contained in the metaclass.
308 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
309 if (const auto *classMethodsIsec = getMethodsIsec(
310 cast<ConcatInputSection>(r->getReferentInputSection())))
311 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
314 void objc::checkCategories() {
315 TimeTraceScope timeScope("ObjcCategoryChecker");
317 ObjcCategoryChecker checker;
318 for (const InputSection *isec : inputSections) {
319 if (isec->getName() == section_names::objcCatList)
320 for (const Reloc &r : isec->relocs) {
321 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
322 checker.parseCategory(catIsec);
327 namespace {
329 class ObjcCategoryMerger {
330 // In which language was a particular construct originally defined
331 enum SourceLanguage { Unknown, ObjC, Swift };
333 // Information about an input category
334 struct InfoInputCategory {
335 ConcatInputSection *catListIsec;
336 ConcatInputSection *catBodyIsec;
337 uint32_t offCatListIsec = 0;
338 SourceLanguage sourceLanguage = SourceLanguage::Unknown;
340 bool wasMerged = false;
343 // To write new (merged) categories or classes, we will try make limited
344 // assumptions about the alignment and the sections the various class/category
345 // info are stored in and . So we'll just reuse the same sections and
346 // alignment as already used in existing (input) categories. To do this we
347 // have InfoCategoryWriter which contains the various sections that the
348 // generated categories will be written to.
349 struct InfoWriteSection {
350 bool valid = false; // Data has been successfully collected from input
351 uint32_t align = 0;
352 Section *inputSection;
353 Reloc relocTemplate;
354 OutputSection *outputSection;
357 struct InfoCategoryWriter {
358 InfoWriteSection catListInfo;
359 InfoWriteSection catBodyInfo;
360 InfoWriteSection catNameInfo;
361 InfoWriteSection catPtrListInfo;
364 // Information about a pointer list in the original categories or class(method
365 // lists, protocol lists, etc)
366 struct PointerListInfo {
367 PointerListInfo() = default;
368 PointerListInfo(const PointerListInfo &) = default;
369 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
370 : categoryPrefix(_categoryPrefix),
371 pointersPerStruct(_pointersPerStruct) {}
373 inline bool operator==(const PointerListInfo &cmp) const {
374 return pointersPerStruct == cmp.pointersPerStruct &&
375 structSize == cmp.structSize && structCount == cmp.structCount &&
376 allPtrs == cmp.allPtrs;
379 const char *categoryPrefix;
381 uint32_t pointersPerStruct = 0;
383 uint32_t structSize = 0;
384 uint32_t structCount = 0;
386 std::vector<Symbol *> allPtrs;
389 // Full information describing an ObjC class . This will include all the
390 // additional methods, protocols, and properties that are contained in the
391 // class and all the categories that extend a particular class.
392 struct ClassExtensionInfo {
393 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
395 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
396 std::string mergedContainerName;
397 std::string baseClassName;
398 const Symbol *baseClass = nullptr;
399 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
401 CategoryLayout &catLayout;
403 // In case we generate new data, mark the new data as belonging to this file
404 ObjFile *objFileForMergeData = nullptr;
406 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
407 /*pointersPerStruct=*/3};
408 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
409 /*pointersPerStruct=*/3};
410 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
411 /*pointersPerStruct=*/0};
412 PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
413 /*pointersPerStruct=*/2};
414 PointerListInfo classProps = {objc::symbol_names::klassPropList,
415 /*pointersPerStruct=*/2};
418 public:
419 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
420 void doMerge();
421 static void doCleanup();
423 private:
424 DenseSet<const Symbol *> collectNlCategories();
425 void collectAndValidateCategoriesData();
426 bool
427 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
429 void eraseISec(ConcatInputSection *isec);
430 void eraseMergedCategories();
432 void generateCatListForNonErasedCategories(
433 MapVector<ConcatInputSection *, std::set<uint64_t>>
434 catListToErasedOffsets);
435 void collectSectionWriteInfoFromIsec(const InputSection *isec,
436 InfoWriteSection &catWriteInfo);
437 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
438 bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
439 ClassExtensionInfo &extInfo);
441 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
442 PointerListInfo &ptrList,
443 SourceLanguage sourceLang);
445 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
446 uint32_t secOffset,
447 SourceLanguage sourceLang);
449 bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
450 PointerListInfo &ptrList);
452 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
453 const ClassExtensionInfo &extInfo,
454 const PointerListInfo &ptrList);
456 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
457 const ClassExtensionInfo &extInfo,
458 const PointerListInfo &ptrList);
460 Defined *emitCategory(const ClassExtensionInfo &extInfo);
461 Defined *emitCatListEntrySec(const std::string &forCategoryName,
462 const std::string &forBaseClassName,
463 ObjFile *objFile);
464 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
465 const Symbol *baseClassSym,
466 const std::string &baseClassName, ObjFile *objFile);
467 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
468 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
469 uint32_t offset, const Reloc &relocTemplate);
470 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
471 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
472 uint32_t offset);
473 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
474 uint32_t offset);
475 Defined *getClassRo(const Defined *classSym, bool getMetaRo);
476 SourceLanguage getClassSymSourceLang(const Defined *classSym);
477 bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
478 std::vector<InfoInputCategory> &categories);
479 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
480 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
481 uint32_t offset);
483 // Allocate a null-terminated StringRef backed by generatedSectionData
484 StringRef newStringData(const char *str);
485 // Allocate section data, backed by generatedSectionData
486 SmallVector<uint8_t> &newSectionData(uint32_t size);
488 CategoryLayout catLayout;
489 ClassLayout classLayout;
490 ROClassLayout roClassLayout;
491 ListHeaderLayout listHeaderLayout;
492 MethodLayout methodLayout;
493 ProtocolListHeaderLayout protocolListHeaderLayout;
495 InfoCategoryWriter infoCategoryWriter;
496 std::vector<ConcatInputSection *> &allInputSections;
497 // Map of base class Symbol to list of InfoInputCategory's for it
498 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
500 // Normally, the binary data comes from the input files, but since we're
501 // generating binary data ourselves, we use the below array to store it in.
502 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
503 // object, as the data will be read by the Writer when the final binary is
504 // generated.
505 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
506 generatedSectionData;
509 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
510 ObjcCategoryMerger::generatedSectionData;
512 ObjcCategoryMerger::ObjcCategoryMerger(
513 std::vector<ConcatInputSection *> &_allInputSections)
514 : catLayout(target->wordSize), classLayout(target->wordSize),
515 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
516 methodLayout(target->wordSize),
517 protocolListHeaderLayout(target->wordSize),
518 allInputSections(_allInputSections) {}
520 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
521 const InputSection *isec, InfoWriteSection &catWriteInfo) {
523 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
524 catWriteInfo.align = isec->align;
525 catWriteInfo.outputSection = isec->parent;
527 assert(catWriteInfo.outputSection &&
528 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
530 if (isec->relocs.size())
531 catWriteInfo.relocTemplate = isec->relocs[0];
533 catWriteInfo.valid = true;
536 Symbol *
537 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
538 uint32_t offset) {
539 if (!isec)
540 return nullptr;
541 const Reloc *reloc = isec->getRelocAt(offset);
543 if (!reloc)
544 return nullptr;
546 Symbol *sym = reloc->referent.dyn_cast<Symbol *>();
548 if (reloc->addend && sym) {
549 assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
550 Defined *definedSym = cast<Defined>(sym);
551 sym = tryFindDefinedOnIsec(definedSym->isec(),
552 definedSym->value + reloc->addend);
555 return sym;
558 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
559 uint32_t offset) {
560 for (Defined *sym : isec->symbols)
561 if ((sym->value <= offset) && (sym->value + sym->size > offset))
562 return sym;
564 return nullptr;
567 Defined *
568 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
569 uint32_t offset) {
570 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
571 return dyn_cast_or_null<Defined>(sym);
574 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
575 // the meta-class's ro_data symbol. Otherwise, we will return the class
576 // (instance) ro_data symbol.
577 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
578 bool getMetaRo) {
579 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
580 if (!isec)
581 return nullptr;
583 if (!getMetaRo)
584 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
585 classSym->value);
587 Defined *metaClass = tryGetDefinedAtIsecOffset(
588 isec, classLayout.metaClassOffset + classSym->value);
589 if (!metaClass)
590 return nullptr;
592 return tryGetDefinedAtIsecOffset(
593 dyn_cast<ConcatInputSection>(metaClass->isec()),
594 classLayout.roDataOffset);
597 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
598 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
599 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
600 const ConcatInputSection *isec, uint32_t offset) {
601 const Reloc *reloc = isec->getRelocAt(offset);
603 if (!reloc)
604 return;
606 Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
607 if (!sym)
608 return;
610 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
611 eraseISec(cisec);
612 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
613 uint32_t totalOffset = sym->value + reloc->addend;
614 StringPiece &piece = csisec->getStringPiece(totalOffset);
615 piece.live = false;
616 } else {
617 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
621 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
622 const InfoInputCategory &catInfo) {
624 if (!infoCategoryWriter.catListInfo.valid)
625 collectSectionWriteInfoFromIsec(catInfo.catListIsec,
626 infoCategoryWriter.catListInfo);
627 if (!infoCategoryWriter.catBodyInfo.valid)
628 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
629 infoCategoryWriter.catBodyInfo);
631 if (!infoCategoryWriter.catNameInfo.valid) {
632 lld::macho::Defined *catNameSym =
633 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
635 if (!catNameSym) {
636 // This is an unhandeled case where the category name is not a symbol but
637 // instead points to an CStringInputSection (that doesn't have any symbol)
638 // TODO: Find a small repro and either fix or add a test case for this
639 // scenario
640 return false;
643 collectSectionWriteInfoFromIsec(catNameSym->isec(),
644 infoCategoryWriter.catNameInfo);
647 // Collect writer info from all the category lists (we're assuming they all
648 // would provide the same info)
649 if (!infoCategoryWriter.catPtrListInfo.valid) {
650 for (uint32_t off = catLayout.instanceMethodsOffset;
651 off <= catLayout.classPropsOffset; off += target->wordSize) {
652 if (Defined *ptrList =
653 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
654 collectSectionWriteInfoFromIsec(ptrList->isec(),
655 infoCategoryWriter.catPtrListInfo);
656 // we've successfully collected data, so we can break
657 break;
662 return true;
665 // Parse a protocol list that might be linked to ConcatInputSection at a given
666 // offset. The format of the protocol list is different than other lists (prop
667 // lists, method lists) so we need to parse it differently
668 void ObjcCategoryMerger::parseProtocolListInfo(
669 const ConcatInputSection *isec, uint32_t secOffset,
670 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
671 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
672 "Tried to read pointer list beyond protocol section end");
674 const Reloc *reloc = isec->getRelocAt(secOffset);
675 if (!reloc)
676 return;
678 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
679 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
681 // Theoretically protocol count can be either 32b or 64b, depending on
682 // platform pointer size, but to simplify implementation we always just read
683 // the lower 32b which should be good enough.
684 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
685 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
687 ptrList.structCount += protocolCount;
688 ptrList.structSize = target->wordSize;
690 [[maybe_unused]] uint32_t expectedListSize =
691 (protocolCount * target->wordSize) +
692 /*header(count)*/ protocolListHeaderLayout.totalSize +
693 /*extra null value*/ target->wordSize;
695 // On Swift, the protocol list does not have the extra (unnecessary) null
696 [[maybe_unused]] uint32_t expectedListSizeSwift =
697 expectedListSize - target->wordSize;
699 assert(((expectedListSize == ptrListSym->isec()->data.size() &&
700 sourceLang == SourceLanguage::ObjC) ||
701 (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
702 sourceLang == SourceLanguage::Swift)) &&
703 "Protocol list does not match expected size");
705 uint32_t off = protocolListHeaderLayout.totalSize;
706 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
707 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
708 assert(reloc && "No reloc found at protocol list offset");
710 auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
711 assert(listSym && "Protocol list reloc does not have a valid Defined");
713 ptrList.allPtrs.push_back(listSym);
714 off += target->wordSize;
716 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
717 "expected null terminating protocol");
718 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
719 "Protocol list end offset does not match expected size");
722 // Parse a protocol list and return the PointerListInfo for it
723 ObjcCategoryMerger::PointerListInfo
724 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
725 uint32_t secOffset,
726 SourceLanguage sourceLang) {
727 PointerListInfo ptrList;
728 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
729 return ptrList;
732 // Parse a pointer list that might be linked to ConcatInputSection at a given
733 // offset. This can be used for instance methods, class methods, instance props
734 // and class props since they have the same format.
735 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
736 uint32_t secOffset,
737 PointerListInfo &ptrList) {
738 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
739 assert(isec && "Trying to parse pointer list from null isec");
740 assert(secOffset + target->wordSize <= isec->data.size() &&
741 "Trying to read pointer list beyond section end");
743 const Reloc *reloc = isec->getRelocAt(secOffset);
744 // Empty list is a valid case, return true.
745 if (!reloc)
746 return true;
748 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
749 assert(ptrListSym && "Reloc does not have a valid Defined");
751 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
752 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
753 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
754 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
755 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
757 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
759 ptrList.structCount += thisStructCount;
760 ptrList.structSize = thisStructSize;
762 uint32_t expectedListSize =
763 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
764 assert(expectedListSize == ptrListSym->isec()->data.size() &&
765 "Pointer list does not match expected size");
767 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
768 off += target->wordSize) {
769 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
770 assert(reloc && "No reloc found at pointer list offset");
772 auto *listSym =
773 dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>());
774 // Sometimes, the reloc points to a StringPiece (InputSection + addend)
775 // instead of a symbol.
776 // TODO: Skip these cases for now, but we should fix this.
777 if (!listSym)
778 return false;
780 ptrList.allPtrs.push_back(listSym);
783 return true;
786 // Here we parse all the information of an input category (catInfo) and
787 // append the parsed info into the structure which will contain all the
788 // information about how a class is extended (extInfo)
789 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
790 ClassExtensionInfo &extInfo) {
791 const Reloc *catNameReloc =
792 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
794 // Parse name
795 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
797 // is this the first category we are parsing?
798 if (extInfo.mergedContainerName.empty())
799 extInfo.objFileForMergeData =
800 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
801 else
802 extInfo.mergedContainerName += "|";
804 assert(extInfo.objFileForMergeData &&
805 "Expected to already have valid objextInfo.objFileForMergeData");
807 StringRef catName = catNameReloc->getReferentString();
808 extInfo.mergedContainerName += catName.str();
810 // Parse base class
811 if (!extInfo.baseClass) {
812 Symbol *classSym =
813 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
814 assert(extInfo.baseClassName.empty());
815 extInfo.baseClass = classSym;
816 llvm::StringRef classPrefix(objc::symbol_names::klass);
817 assert(classSym->getName().starts_with(classPrefix) &&
818 "Base class symbol does not start with expected prefix");
819 extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
820 } else {
821 assert((extInfo.baseClass ==
822 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
823 catLayout.klassOffset)) &&
824 "Trying to parse category info into container with different base "
825 "class");
828 if (!parsePointerListInfo(catInfo.catBodyIsec,
829 catLayout.instanceMethodsOffset,
830 extInfo.instanceMethods))
831 return false;
833 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
834 extInfo.classMethods))
835 return false;
837 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
838 extInfo.protocols, catInfo.sourceLanguage);
840 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
841 extInfo.instanceProps))
842 return false;
844 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
845 extInfo.classProps))
846 return false;
848 return true;
851 // Generate a protocol list (including header) and link it into the parent at
852 // the specified offset.
853 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
854 Defined *parentSym, uint32_t linkAtOffset,
855 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
856 if (ptrList.allPtrs.empty())
857 return nullptr;
859 assert(ptrList.allPtrs.size() == ptrList.structCount);
861 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
862 /*header(count)*/ protocolListHeaderLayout.totalSize +
863 /*extra null value*/ target->wordSize;
864 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
866 // This theoretically can be either 32b or 64b, but writing just the first 32b
867 // is good enough
868 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
869 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
871 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
873 ConcatInputSection *listSec = make<ConcatInputSection>(
874 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
875 infoCategoryWriter.catPtrListInfo.align);
876 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
877 listSec->live = true;
879 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
881 std::string symName = ptrList.categoryPrefix;
882 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
884 Defined *ptrListSym = make<Defined>(
885 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
886 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
887 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
888 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
889 /*isWeakDefCanBeHidden=*/false);
891 ptrListSym->used = true;
892 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
893 addInputSection(listSec);
895 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
896 infoCategoryWriter.catBodyInfo.relocTemplate);
898 uint32_t offset = protocolListHeaderLayout.totalSize;
899 for (Symbol *symbol : ptrList.allPtrs) {
900 createSymbolReference(ptrListSym, symbol, offset,
901 infoCategoryWriter.catPtrListInfo.relocTemplate);
902 offset += target->wordSize;
905 return ptrListSym;
908 // Generate a pointer list (including header) and link it into the parent at the
909 // specified offset. This is used for instance and class methods and
910 // proprieties.
911 void ObjcCategoryMerger::emitAndLinkPointerList(
912 Defined *parentSym, uint32_t linkAtOffset,
913 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
914 if (ptrList.allPtrs.empty())
915 return;
917 assert(ptrList.allPtrs.size() * target->wordSize ==
918 ptrList.structCount * ptrList.structSize);
920 // Generate body
921 uint32_t bodySize =
922 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
923 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
925 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
926 bodyData.data() + listHeaderLayout.structSizeOffset);
927 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
928 bodyData.data() + listHeaderLayout.structCountOffset);
930 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
931 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
933 ConcatInputSection *listSec = make<ConcatInputSection>(
934 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
935 infoCategoryWriter.catPtrListInfo.align);
936 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
937 listSec->live = true;
939 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
941 std::string symName = ptrList.categoryPrefix;
942 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
944 Defined *ptrListSym = make<Defined>(
945 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
946 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
947 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
948 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
949 /*isWeakDefCanBeHidden=*/false);
951 ptrListSym->used = true;
952 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
953 addInputSection(listSec);
955 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
956 infoCategoryWriter.catBodyInfo.relocTemplate);
958 uint32_t offset = listHeaderLayout.totalSize;
959 for (Symbol *symbol : ptrList.allPtrs) {
960 createSymbolReference(ptrListSym, symbol, offset,
961 infoCategoryWriter.catPtrListInfo.relocTemplate);
962 offset += target->wordSize;
966 // This method creates an __objc_catlist ConcatInputSection with a single slot
967 Defined *
968 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
969 const std::string &forBaseClassName,
970 ObjFile *objFile) {
971 uint32_t sectionSize = target->wordSize;
972 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
974 ConcatInputSection *newCatList =
975 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
976 bodyData, infoCategoryWriter.catListInfo.align);
977 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
978 newCatList->live = true;
980 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
982 std::string catSymName = "<__objc_catlist slot for merged category ";
983 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
985 Defined *catListSym = make<Defined>(
986 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
987 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
988 /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
989 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
990 /*isWeakDefCanBeHidden=*/false);
992 catListSym->used = true;
993 objFile->symbols.push_back(catListSym);
994 addInputSection(newCatList);
995 return catListSym;
998 // Here we generate the main category body and link the name and base class into
999 // it. We don't link any other info yet like the protocol and class/instance
1000 // methods/props.
1001 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1002 const Defined *nameSym,
1003 const Symbol *baseClassSym,
1004 const std::string &baseClassName,
1005 ObjFile *objFile) {
1006 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1008 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1009 catLayout.sizeOffset);
1010 *ptrSize = catLayout.totalSize;
1012 ConcatInputSection *newBodySec =
1013 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1014 bodyData, infoCategoryWriter.catBodyInfo.align);
1015 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1016 newBodySec->live = true;
1018 std::string symName =
1019 objc::symbol_names::category + baseClassName + "(" + name + ")";
1020 Defined *catBodySym = make<Defined>(
1021 newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1022 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1023 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1024 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1025 /*isWeakDefCanBeHidden=*/false);
1027 catBodySym->used = true;
1028 objFile->symbols.push_back(catBodySym);
1029 addInputSection(newBodySec);
1031 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1032 infoCategoryWriter.catBodyInfo.relocTemplate);
1034 // Create a reloc to the base class (either external or internal)
1035 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1036 infoCategoryWriter.catBodyInfo.relocTemplate);
1038 return catBodySym;
1041 // This writes the new category name (for the merged category) into the binary
1042 // and returns the sybmol for it.
1043 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1044 ObjFile *objFile) {
1045 StringRef nameStrData = newStringData(name.c_str());
1046 // We use +1 below to include the null terminator
1047 llvm::ArrayRef<uint8_t> nameData(
1048 reinterpret_cast<const uint8_t *>(nameStrData.data()),
1049 nameStrData.size() + 1);
1051 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1052 CStringInputSection *newStringSec = make<CStringInputSection>(
1053 *infoCategoryWriter.catNameInfo.inputSection, nameData,
1054 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1056 parentSection->subsections.push_back({0, newStringSec});
1058 newStringSec->splitIntoPieces();
1059 newStringSec->pieces[0].live = true;
1060 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1061 in.cStringSection->addInput(newStringSec);
1062 assert(newStringSec->pieces.size() == 1);
1064 Defined *catNameSym = make<Defined>(
1065 "<merged category name>", /*file=*/objFile, newStringSec,
1066 /*value=*/0, nameData.size(),
1067 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1068 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1069 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1071 catNameSym->used = true;
1072 objFile->symbols.push_back(catNameSym);
1073 return catNameSym;
1076 // This method fully creates a new category from the given ClassExtensionInfo.
1077 // It creates the category name, body and method/protocol/prop lists and links
1078 // them all together. Then it creates a new __objc_catlist entry and adds the
1079 // category to it. Calling this method will fully generate a category which will
1080 // be available in the final binary.
1081 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1082 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1083 extInfo.objFileForMergeData);
1085 Defined *catBodySym = emitCategoryBody(
1086 extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1087 extInfo.baseClassName, extInfo.objFileForMergeData);
1089 Defined *catListSym =
1090 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1091 extInfo.objFileForMergeData);
1093 // Add the single category body to the category list at the offset 0.
1094 createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1095 infoCategoryWriter.catListInfo.relocTemplate);
1097 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1098 extInfo.instanceMethods);
1100 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1101 extInfo.classMethods);
1103 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1104 extInfo.protocols);
1106 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1107 extInfo.instanceProps);
1109 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1110 extInfo.classProps);
1112 return catBodySym;
1115 // This method merges all the categories (sharing a base class) into a single
1116 // category.
1117 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1118 std::vector<InfoInputCategory> &categories) {
1119 assert(categories.size() > 1 && "Expected at least 2 categories");
1121 ClassExtensionInfo extInfo(catLayout);
1123 for (auto &catInfo : categories)
1124 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1125 return false;
1127 Defined *newCatDef = emitCategory(extInfo);
1128 assert(newCatDef && "Failed to create a new category");
1130 // Suppress unsuded var warning
1131 (void)newCatDef;
1133 for (auto &catInfo : categories)
1134 catInfo.wasMerged = true;
1136 return true;
1139 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1140 const Symbol *refTo,
1141 uint32_t offset,
1142 const Reloc &relocTemplate) {
1143 Reloc r = relocTemplate;
1144 r.offset = offset;
1145 r.addend = 0;
1146 r.referent = const_cast<Symbol *>(refTo);
1147 refFrom->isec()->relocs.push_back(r);
1150 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1151 // optimize these as they have a '+load' method that has to be called at
1152 // runtime.
1153 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1154 DenseSet<const Symbol *> nlCategories;
1156 for (InputSection *sec : allInputSections) {
1157 if (sec->getName() != section_names::objcNonLazyCatList)
1158 continue;
1160 for (auto &r : sec->relocs) {
1161 const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1162 nlCategories.insert(sym);
1165 return nlCategories;
1168 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1169 auto nlCategories = collectNlCategories();
1171 for (InputSection *sec : allInputSections) {
1172 if (sec->getName() != section_names::objcCatList)
1173 continue;
1174 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1175 assert(catListCisec &&
1176 "__objc_catList InputSection is not a ConcatInputSection");
1178 for (uint32_t off = 0; off < catListCisec->getSize();
1179 off += target->wordSize) {
1180 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1181 assert(categorySym &&
1182 "Failed to get a valid category at __objc_catlit offset");
1184 if (nlCategories.count(categorySym))
1185 continue;
1187 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1188 assert(catBodyIsec &&
1189 "Category data section is not an ConcatInputSection");
1191 SourceLanguage eLang = SourceLanguage::Unknown;
1192 if (categorySym->getName().starts_with(objc::symbol_names::category))
1193 eLang = SourceLanguage::ObjC;
1194 else if (categorySym->getName().starts_with(
1195 objc::symbol_names::swift_objc_category))
1196 eLang = SourceLanguage::Swift;
1197 else
1198 llvm_unreachable("Unexpected category symbol name");
1200 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1202 // Check that the category has a reloc at 'klassOffset' (which is
1203 // a pointer to the class symbol)
1205 Symbol *classSym =
1206 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1207 assert(classSym && "Category does not have a valid base class");
1209 if (!collectCategoryWriterInfoFromCategory(catInputInfo))
1210 continue;
1212 categoryMap[classSym].push_back(catInputInfo);
1217 // In the input we have multiple __objc_catlist InputSection, each of which may
1218 // contain links to multiple categories. Of these categories, we will merge (and
1219 // erase) only some. There will be some categories that will remain untouched
1220 // (not erased). For these not erased categories, we generate new __objc_catlist
1221 // entries since the parent __objc_catlist entry will be erased
1222 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1223 const MapVector<ConcatInputSection *, std::set<uint64_t>>
1224 catListToErasedOffsets) {
1226 // Go through all offsets of all __objc_catlist's that we process and if there
1227 // are categories that we didn't process - generate a new __objc_catlist for
1228 // each.
1229 for (auto &mapEntry : catListToErasedOffsets) {
1230 ConcatInputSection *catListIsec = mapEntry.first;
1231 for (uint32_t catListIsecOffset = 0;
1232 catListIsecOffset < catListIsec->data.size();
1233 catListIsecOffset += target->wordSize) {
1234 // This slot was erased, we can just skip it
1235 if (mapEntry.second.count(catListIsecOffset))
1236 continue;
1238 Defined *nonErasedCatBody =
1239 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1240 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1242 // Allocate data for the new __objc_catlist slot
1243 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1245 // We mark the __objc_catlist slot as belonging to the same file as the
1246 // category
1247 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1249 ConcatInputSection *listSec = make<ConcatInputSection>(
1250 *infoCategoryWriter.catListInfo.inputSection, bodyData,
1251 infoCategoryWriter.catListInfo.align);
1252 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1253 listSec->live = true;
1255 std::string slotSymName = "<__objc_catlist slot for category ";
1256 slotSymName += nonErasedCatBody->getName();
1257 slotSymName += ">";
1259 Defined *catListSlotSym = make<Defined>(
1260 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1261 /*value=*/0, bodyData.size(),
1262 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1263 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1264 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1266 catListSlotSym->used = true;
1267 objFile->symbols.push_back(catListSlotSym);
1268 addInputSection(listSec);
1270 // Now link the category body into the newly created slot
1271 createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1272 infoCategoryWriter.catListInfo.relocTemplate);
1277 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1278 isec->live = false;
1279 for (auto &sym : isec->symbols)
1280 sym->used = false;
1283 // This fully erases the merged categories, including their body, their names,
1284 // their method/protocol/prop lists and the __objc_catlist entries that link to
1285 // them.
1286 void ObjcCategoryMerger::eraseMergedCategories() {
1287 // Map of InputSection to a set of offsets of the categories that were merged
1288 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1290 for (auto &mapEntry : categoryMap) {
1291 for (InfoInputCategory &catInfo : mapEntry.second) {
1292 if (catInfo.wasMerged) {
1293 eraseISec(catInfo.catListIsec);
1294 catListToErasedOffsets[catInfo.catListIsec].insert(
1295 catInfo.offCatListIsec);
1300 // If there were categories that we did not erase, we need to generate a new
1301 // __objc_catList that contains only the un-merged categories, and get rid of
1302 // the references to the ones we merged.
1303 generateCatListForNonErasedCategories(catListToErasedOffsets);
1305 // Erase the old method lists & names of the categories that were merged
1306 for (auto &mapEntry : categoryMap) {
1307 for (InfoInputCategory &catInfo : mapEntry.second) {
1308 if (!catInfo.wasMerged)
1309 continue;
1311 eraseISec(catInfo.catBodyIsec);
1313 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1314 // categories because the name will sometimes also be used for other
1315 // purposes.
1316 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1317 // objc-category-merging-swift.s
1318 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1319 // objc-category-merging-erase-objc-name-test.s
1320 // TODO: handle the above in a smarter way
1322 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1323 catLayout.instanceMethodsOffset);
1324 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1325 catLayout.classMethodsOffset);
1326 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1327 catLayout.protocolsOffset);
1328 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1329 catLayout.classPropsOffset);
1330 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1331 catLayout.instancePropsOffset);
1336 void ObjcCategoryMerger::doMerge() {
1337 collectAndValidateCategoriesData();
1339 for (auto &[baseClass, catInfos] : categoryMap) {
1340 bool merged = false;
1341 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1342 // Merge all categories into the base class
1343 merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1344 } else if (catInfos.size() > 1) {
1345 // Merge all categories into a new, single category
1346 merged = mergeCategoriesIntoSingleCategory(catInfos);
1348 if (!merged)
1349 warn("ObjC category merging skipped for class symbol' " +
1350 baseClass->getName().str() + "'\n");
1353 // Erase all categories that were merged
1354 eraseMergedCategories();
1357 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1359 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1360 uint32_t len = strlen(str);
1361 uint32_t bufSize = len + 1;
1362 SmallVector<uint8_t> &data = newSectionData(bufSize);
1363 char *strData = reinterpret_cast<char *>(data.data());
1364 // Copy the string chars and null-terminator
1365 memcpy(strData, str, bufSize);
1366 return StringRef(strData, len);
1369 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1370 generatedSectionData.push_back(
1371 std::make_unique<SmallVector<uint8_t>>(size, 0));
1372 return *generatedSectionData.back();
1375 } // namespace
1377 void objc::mergeCategories() {
1378 TimeTraceScope timeScope("ObjcCategoryMerger");
1380 ObjcCategoryMerger merger(inputSections);
1381 merger.doMerge();
1384 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1386 ObjcCategoryMerger::SourceLanguage
1387 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1388 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1389 return SourceLanguage::Swift;
1391 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1392 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1393 // classes. Ex:
1394 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1396 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1398 // So we scan for symbols with the same address and check for the Swift class
1399 if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1400 for (auto &sym : classSym->originalIsec->symbols)
1401 if (sym->value == classSym->value)
1402 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1403 return SourceLanguage::Swift;
1404 return SourceLanguage::ObjC;
1407 llvm_unreachable("Unexpected class symbol name during category merging");
1410 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1411 const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1412 assert(categories.size() >= 1 && "Expected at least one category to merge");
1414 // Collect all the info from the categories
1415 ClassExtensionInfo extInfo(catLayout);
1416 extInfo.baseClass = baseClass;
1417 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1419 for (auto &catInfo : categories)
1420 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1421 return false;
1423 // Get metadata for the base class
1424 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1425 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1426 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1427 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1429 // Now collect the info from the base class from the various lists in the
1430 // class metadata
1432 // Protocol lists are a special case - the same protocol list is in classRo
1433 // and metaRo, so we only need to parse it once
1434 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1435 extInfo.protocols, extInfo.baseClassSourceLanguage);
1437 // Check that the classRo and metaRo protocol lists are identical
1438 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1439 extInfo.baseClassSourceLanguage) ==
1440 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1441 extInfo.baseClassSourceLanguage) &&
1442 "Category merger expects classRo and metaRo to have the same protocol "
1443 "list");
1445 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1446 extInfo.classMethods);
1447 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1448 extInfo.instanceMethods);
1450 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1451 extInfo.classProps);
1452 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1453 extInfo.instanceProps);
1455 // Erase the old lists - these will be generated and replaced
1456 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1457 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1458 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1459 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1460 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1461 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1463 // Emit the newly merged lists - first into the meta RO then into the class RO
1464 // First we emit and link the protocol list into the meta RO. Then we link it
1465 // in the classRo as well (they're supposed to be identical)
1466 if (Defined *protoListSym =
1467 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1468 extInfo, extInfo.protocols)) {
1469 createSymbolReference(classRo, protoListSym,
1470 roClassLayout.baseProtocolsOffset,
1471 infoCategoryWriter.catBodyInfo.relocTemplate);
1474 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1475 extInfo.classMethods);
1476 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1477 extInfo.instanceMethods);
1479 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1480 extInfo.classProps);
1482 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1483 extInfo.instanceProps);
1485 // Mark all the categories as merged - this will be used to erase them later
1486 for (auto &catInfo : categories)
1487 catInfo.wasMerged = true;
1489 return true;
1492 // Erase the symbol at a given offset in an InputSection
1493 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1494 uint32_t offset) {
1495 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1496 if (!sym)
1497 return;
1499 // Remove the symbol from isec->symbols
1500 assert(isa<Defined>(sym) && "Can only erase a Defined");
1501 llvm::erase(isec->symbols, sym);
1503 // Remove the relocs that refer to this symbol
1504 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1505 llvm::erase_if(isec->relocs, removeAtOff);
1507 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1508 // the whole ConcatInputSection
1509 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1510 if (cisec->data.size() == sym->size)
1511 eraseISec(cisec);