[InstCombine] Signed saturation tests. NFC
[llvm-complete.git] / lib / Transforms / IPO / LowerTypeTests.cpp
blob2dec366d70e264a6887730348f5762c2e5d77355
1 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers type metadata and calls to the llvm.type.test intrinsic.
10 // It also ensures that globals are properly laid out for the
11 // llvm.icall.branch.funnel intrinsic.
12 // See http://llvm.org/docs/TypeMetadata.html for more information.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/IPO/LowerTypeTests.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/EquivalenceClasses.h"
21 #include "llvm/ADT/PointerUnion.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/TinyPtrVector.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/Analysis/TypeMetadataUtils.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalAlias.h"
38 #include "llvm/IR/GlobalObject.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/GlobalVariable.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/ModuleSummaryIndex.h"
50 #include "llvm/IR/ModuleSummaryIndexYAML.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Use.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/Allocator.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/Error.h"
63 #include "llvm/Support/ErrorHandling.h"
64 #include "llvm/Support/FileSystem.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/MemoryBuffer.h"
67 #include "llvm/Support/TrailingObjects.h"
68 #include "llvm/Support/YAMLTraits.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/IPO.h"
71 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
72 #include "llvm/Transforms/Utils/ModuleUtils.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <memory>
77 #include <set>
78 #include <string>
79 #include <system_error>
80 #include <utility>
81 #include <vector>
83 using namespace llvm;
84 using namespace lowertypetests;
86 #define DEBUG_TYPE "lowertypetests"
88 STATISTIC(ByteArraySizeBits, "Byte array size in bits");
89 STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
90 STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
91 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
92 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
94 static cl::opt<bool> AvoidReuse(
95 "lowertypetests-avoid-reuse",
96 cl::desc("Try to avoid reuse of byte array addresses using aliases"),
97 cl::Hidden, cl::init(true));
99 static cl::opt<PassSummaryAction> ClSummaryAction(
100 "lowertypetests-summary-action",
101 cl::desc("What to do with the summary when running this pass"),
102 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
103 clEnumValN(PassSummaryAction::Import, "import",
104 "Import typeid resolutions from summary and globals"),
105 clEnumValN(PassSummaryAction::Export, "export",
106 "Export typeid resolutions to summary and globals")),
107 cl::Hidden);
109 static cl::opt<std::string> ClReadSummary(
110 "lowertypetests-read-summary",
111 cl::desc("Read summary from given YAML file before running pass"),
112 cl::Hidden);
114 static cl::opt<std::string> ClWriteSummary(
115 "lowertypetests-write-summary",
116 cl::desc("Write summary to given YAML file after running pass"),
117 cl::Hidden);
119 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
120 if (Offset < ByteOffset)
121 return false;
123 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
124 return false;
126 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
127 if (BitOffset >= BitSize)
128 return false;
130 return Bits.count(BitOffset);
133 void BitSetInfo::print(raw_ostream &OS) const {
134 OS << "offset " << ByteOffset << " size " << BitSize << " align "
135 << (1 << AlignLog2);
137 if (isAllOnes()) {
138 OS << " all-ones\n";
139 return;
142 OS << " { ";
143 for (uint64_t B : Bits)
144 OS << B << ' ';
145 OS << "}\n";
148 BitSetInfo BitSetBuilder::build() {
149 if (Min > Max)
150 Min = 0;
152 // Normalize each offset against the minimum observed offset, and compute
153 // the bitwise OR of each of the offsets. The number of trailing zeros
154 // in the mask gives us the log2 of the alignment of all offsets, which
155 // allows us to compress the bitset by only storing one bit per aligned
156 // address.
157 uint64_t Mask = 0;
158 for (uint64_t &Offset : Offsets) {
159 Offset -= Min;
160 Mask |= Offset;
163 BitSetInfo BSI;
164 BSI.ByteOffset = Min;
166 BSI.AlignLog2 = 0;
167 if (Mask != 0)
168 BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
170 // Build the compressed bitset while normalizing the offsets against the
171 // computed alignment.
172 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
173 for (uint64_t Offset : Offsets) {
174 Offset >>= BSI.AlignLog2;
175 BSI.Bits.insert(Offset);
178 return BSI;
181 void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
182 // Create a new fragment to hold the layout for F.
183 Fragments.emplace_back();
184 std::vector<uint64_t> &Fragment = Fragments.back();
185 uint64_t FragmentIndex = Fragments.size() - 1;
187 for (auto ObjIndex : F) {
188 uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
189 if (OldFragmentIndex == 0) {
190 // We haven't seen this object index before, so just add it to the current
191 // fragment.
192 Fragment.push_back(ObjIndex);
193 } else {
194 // This index belongs to an existing fragment. Copy the elements of the
195 // old fragment into this one and clear the old fragment. We don't update
196 // the fragment map just yet, this ensures that any further references to
197 // indices from the old fragment in this fragment do not insert any more
198 // indices.
199 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
200 Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
201 OldFragment.clear();
205 // Update the fragment map to point our object indices to this fragment.
206 for (uint64_t ObjIndex : Fragment)
207 FragmentMap[ObjIndex] = FragmentIndex;
210 void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
211 uint64_t BitSize, uint64_t &AllocByteOffset,
212 uint8_t &AllocMask) {
213 // Find the smallest current allocation.
214 unsigned Bit = 0;
215 for (unsigned I = 1; I != BitsPerByte; ++I)
216 if (BitAllocs[I] < BitAllocs[Bit])
217 Bit = I;
219 AllocByteOffset = BitAllocs[Bit];
221 // Add our size to it.
222 unsigned ReqSize = AllocByteOffset + BitSize;
223 BitAllocs[Bit] = ReqSize;
224 if (Bytes.size() < ReqSize)
225 Bytes.resize(ReqSize);
227 // Set our bits.
228 AllocMask = 1 << Bit;
229 for (uint64_t B : Bits)
230 Bytes[AllocByteOffset + B] |= AllocMask;
233 bool lowertypetests::isJumpTableCanonical(Function *F) {
234 if (F->isDeclarationForLinker())
235 return false;
236 auto *CI = mdconst::extract_or_null<ConstantInt>(
237 F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
238 if (!CI || CI->getZExtValue() != 0)
239 return true;
240 return F->hasFnAttribute("cfi-canonical-jump-table");
243 namespace {
245 struct ByteArrayInfo {
246 std::set<uint64_t> Bits;
247 uint64_t BitSize;
248 GlobalVariable *ByteArray;
249 GlobalVariable *MaskGlobal;
250 uint8_t *MaskPtr = nullptr;
253 /// A POD-like structure that we use to store a global reference together with
254 /// its metadata types. In this pass we frequently need to query the set of
255 /// metadata types referenced by a global, which at the IR level is an expensive
256 /// operation involving a map lookup; this data structure helps to reduce the
257 /// number of times we need to do this lookup.
258 class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
259 friend TrailingObjects;
261 GlobalObject *GO;
262 size_t NTypes;
264 // For functions: true if the jump table is canonical. This essentially means
265 // whether the canonical address (i.e. the symbol table entry) of the function
266 // is provided by the local jump table. This is normally the same as whether
267 // the function is defined locally, but if canonical jump tables are disabled
268 // by the user then the jump table never provides a canonical definition.
269 bool IsJumpTableCanonical;
271 // For functions: true if this function is either defined or used in a thinlto
272 // module and its jumptable entry needs to be exported to thinlto backends.
273 bool IsExported;
275 size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
277 public:
278 static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
279 bool IsJumpTableCanonical, bool IsExported,
280 ArrayRef<MDNode *> Types) {
281 auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
282 totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
283 GTM->GO = GO;
284 GTM->NTypes = Types.size();
285 GTM->IsJumpTableCanonical = IsJumpTableCanonical;
286 GTM->IsExported = IsExported;
287 std::uninitialized_copy(Types.begin(), Types.end(),
288 GTM->getTrailingObjects<MDNode *>());
289 return GTM;
292 GlobalObject *getGlobal() const {
293 return GO;
296 bool isJumpTableCanonical() const {
297 return IsJumpTableCanonical;
300 bool isExported() const {
301 return IsExported;
304 ArrayRef<MDNode *> types() const {
305 return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
309 struct ICallBranchFunnel final
310 : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> {
311 static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI,
312 ArrayRef<GlobalTypeMember *> Targets,
313 unsigned UniqueId) {
314 auto *Call = static_cast<ICallBranchFunnel *>(
315 Alloc.Allocate(totalSizeToAlloc<GlobalTypeMember *>(Targets.size()),
316 alignof(ICallBranchFunnel)));
317 Call->CI = CI;
318 Call->UniqueId = UniqueId;
319 Call->NTargets = Targets.size();
320 std::uninitialized_copy(Targets.begin(), Targets.end(),
321 Call->getTrailingObjects<GlobalTypeMember *>());
322 return Call;
325 CallInst *CI;
326 ArrayRef<GlobalTypeMember *> targets() const {
327 return makeArrayRef(getTrailingObjects<GlobalTypeMember *>(), NTargets);
330 unsigned UniqueId;
332 private:
333 size_t NTargets;
336 struct ScopedSaveAliaseesAndUsed {
337 Module &M;
338 SmallPtrSet<GlobalValue *, 16> Used, CompilerUsed;
339 std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
341 ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
342 // The users of this class want to replace all function references except
343 // for aliases and llvm.used/llvm.compiler.used with references to a jump
344 // table. We avoid replacing aliases in order to avoid introducing a double
345 // indirection (or an alias pointing to a declaration in ThinLTO mode), and
346 // we avoid replacing llvm.used/llvm.compiler.used because these global
347 // variables describe properties of the global, not the jump table (besides,
348 // offseted references to the jump table in llvm.used are invalid).
349 // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
350 // indirect) users", so what we do is save the list of globals referenced by
351 // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
352 // replace the aliasees and then set them back to their original values at
353 // the end.
354 if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false))
355 GV->eraseFromParent();
356 if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
357 GV->eraseFromParent();
359 for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
360 // FIXME: This should look past all aliases not just interposable ones,
361 // see discussion on D65118.
362 if (auto *F =
363 dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
364 FunctionAliases.push_back({&GIS, F});
368 ~ScopedSaveAliaseesAndUsed() {
369 appendToUsed(M, std::vector<GlobalValue *>(Used.begin(), Used.end()));
370 appendToCompilerUsed(M, std::vector<GlobalValue *>(CompilerUsed.begin(),
371 CompilerUsed.end()));
373 for (auto P : FunctionAliases)
374 P.first->setIndirectSymbol(
375 ConstantExpr::getBitCast(P.second, P.first->getType()));
379 class LowerTypeTestsModule {
380 Module &M;
382 ModuleSummaryIndex *ExportSummary;
383 const ModuleSummaryIndex *ImportSummary;
385 Triple::ArchType Arch;
386 Triple::OSType OS;
387 Triple::ObjectFormatType ObjectFormat;
389 IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
390 IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
391 PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
392 ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0);
393 IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
394 PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
395 IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
396 IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0);
398 // Indirect function call index assignment counter for WebAssembly
399 uint64_t IndirectIndex = 1;
401 // Mapping from type identifiers to the call sites that test them, as well as
402 // whether the type identifier needs to be exported to ThinLTO backends as
403 // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
404 struct TypeIdUserInfo {
405 std::vector<CallInst *> CallSites;
406 bool IsExported = false;
408 DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
410 /// This structure describes how to lower type tests for a particular type
411 /// identifier. It is either built directly from the global analysis (during
412 /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
413 /// identifier summaries and external symbol references (in ThinLTO backends).
414 struct TypeIdLowering {
415 TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
417 /// All except Unsat: the start address within the combined global.
418 Constant *OffsetedGlobal;
420 /// ByteArray, Inline, AllOnes: log2 of the required global alignment
421 /// relative to the start address.
422 Constant *AlignLog2;
424 /// ByteArray, Inline, AllOnes: one less than the size of the memory region
425 /// covering members of this type identifier as a multiple of 2^AlignLog2.
426 Constant *SizeM1;
428 /// ByteArray: the byte array to test the address against.
429 Constant *TheByteArray;
431 /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
432 Constant *BitMask;
434 /// Inline: the bit mask to test the address against.
435 Constant *InlineBits;
438 std::vector<ByteArrayInfo> ByteArrayInfos;
440 Function *WeakInitializerFn = nullptr;
442 bool shouldExportConstantsAsAbsoluteSymbols();
443 uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
444 TypeIdLowering importTypeId(StringRef TypeId);
445 void importTypeTest(CallInst *CI);
446 void importFunction(Function *F, bool isJumpTableCanonical,
447 std::vector<GlobalAlias *> &AliasesToErase);
449 BitSetInfo
450 buildBitSet(Metadata *TypeId,
451 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
452 ByteArrayInfo *createByteArray(BitSetInfo &BSI);
453 void allocateByteArrays();
454 Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
455 Value *BitOffset);
456 void lowerTypeTestCalls(
457 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
458 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
459 Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
460 const TypeIdLowering &TIL);
462 void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
463 ArrayRef<GlobalTypeMember *> Globals);
464 unsigned getJumpTableEntrySize();
465 Type *getJumpTableEntryType();
466 void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
467 Triple::ArchType JumpTableArch,
468 SmallVectorImpl<Value *> &AsmArgs, Function *Dest);
469 void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
470 void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
471 ArrayRef<GlobalTypeMember *> Functions);
472 void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds,
473 ArrayRef<GlobalTypeMember *> Functions);
474 void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds,
475 ArrayRef<GlobalTypeMember *> Functions);
476 void
477 buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
478 ArrayRef<GlobalTypeMember *> Globals,
479 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
481 void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
482 bool IsJumpTableCanonical);
483 void moveInitializerToModuleConstructor(GlobalVariable *GV);
484 void findGlobalVariableUsersOf(Constant *C,
485 SmallSetVector<GlobalVariable *, 8> &Out);
487 void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
489 /// replaceCfiUses - Go through the uses list for this definition
490 /// and make each use point to "V" instead of "this" when the use is outside
491 /// the block. 'This's use list is expected to have at least one element.
492 /// Unlike replaceAllUsesWith this function skips blockaddr and direct call
493 /// uses.
494 void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
496 /// replaceDirectCalls - Go through the uses list for this definition and
497 /// replace each use, which is a direct function call.
498 void replaceDirectCalls(Value *Old, Value *New);
500 public:
501 LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
502 const ModuleSummaryIndex *ImportSummary);
504 bool lower();
506 // Lower the module using the action and summary passed as command line
507 // arguments. For testing purposes only.
508 static bool runForTesting(Module &M);
511 struct LowerTypeTests : public ModulePass {
512 static char ID;
514 bool UseCommandLine = false;
516 ModuleSummaryIndex *ExportSummary;
517 const ModuleSummaryIndex *ImportSummary;
519 LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
520 initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
523 LowerTypeTests(ModuleSummaryIndex *ExportSummary,
524 const ModuleSummaryIndex *ImportSummary)
525 : ModulePass(ID), ExportSummary(ExportSummary),
526 ImportSummary(ImportSummary) {
527 initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
530 bool runOnModule(Module &M) override {
531 if (UseCommandLine)
532 return LowerTypeTestsModule::runForTesting(M);
533 return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
537 } // end anonymous namespace
539 char LowerTypeTests::ID = 0;
541 INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
542 false)
544 ModulePass *
545 llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
546 const ModuleSummaryIndex *ImportSummary) {
547 return new LowerTypeTests(ExportSummary, ImportSummary);
550 /// Build a bit set for TypeId using the object layouts in
551 /// GlobalLayout.
552 BitSetInfo LowerTypeTestsModule::buildBitSet(
553 Metadata *TypeId,
554 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
555 BitSetBuilder BSB;
557 // Compute the byte offset of each address associated with this type
558 // identifier.
559 for (auto &GlobalAndOffset : GlobalLayout) {
560 for (MDNode *Type : GlobalAndOffset.first->types()) {
561 if (Type->getOperand(1) != TypeId)
562 continue;
563 uint64_t Offset =
564 cast<ConstantInt>(
565 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
566 ->getZExtValue();
567 BSB.addOffset(GlobalAndOffset.second + Offset);
571 return BSB.build();
574 /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
575 /// Bits. This pattern matches to the bt instruction on x86.
576 static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
577 Value *BitOffset) {
578 auto BitsType = cast<IntegerType>(Bits->getType());
579 unsigned BitWidth = BitsType->getBitWidth();
581 BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
582 Value *BitIndex =
583 B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
584 Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
585 Value *MaskedBits = B.CreateAnd(Bits, BitMask);
586 return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
589 ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
590 // Create globals to stand in for byte arrays and masks. These never actually
591 // get initialized, we RAUW and erase them later in allocateByteArrays() once
592 // we know the offset and mask to use.
593 auto ByteArrayGlobal = new GlobalVariable(
594 M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
595 auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true,
596 GlobalValue::PrivateLinkage, nullptr);
598 ByteArrayInfos.emplace_back();
599 ByteArrayInfo *BAI = &ByteArrayInfos.back();
601 BAI->Bits = BSI.Bits;
602 BAI->BitSize = BSI.BitSize;
603 BAI->ByteArray = ByteArrayGlobal;
604 BAI->MaskGlobal = MaskGlobal;
605 return BAI;
608 void LowerTypeTestsModule::allocateByteArrays() {
609 llvm::stable_sort(ByteArrayInfos,
610 [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
611 return BAI1.BitSize > BAI2.BitSize;
614 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
616 ByteArrayBuilder BAB;
617 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
618 ByteArrayInfo *BAI = &ByteArrayInfos[I];
620 uint8_t Mask;
621 BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
623 BAI->MaskGlobal->replaceAllUsesWith(
624 ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
625 BAI->MaskGlobal->eraseFromParent();
626 if (BAI->MaskPtr)
627 *BAI->MaskPtr = Mask;
630 Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
631 auto ByteArray =
632 new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true,
633 GlobalValue::PrivateLinkage, ByteArrayConst);
635 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
636 ByteArrayInfo *BAI = &ByteArrayInfos[I];
638 Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
639 ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
640 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
641 ByteArrayConst->getType(), ByteArray, Idxs);
643 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
644 // that the pc-relative displacement is folded into the lea instead of the
645 // test instruction getting another displacement.
646 GlobalAlias *Alias = GlobalAlias::create(
647 Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
648 BAI->ByteArray->replaceAllUsesWith(Alias);
649 BAI->ByteArray->eraseFromParent();
652 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
653 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
654 BAB.BitAllocs[6] + BAB.BitAllocs[7];
655 ByteArraySizeBytes = BAB.Bytes.size();
658 /// Build a test that bit BitOffset is set in the type identifier that was
659 /// lowered to TIL, which must be either an Inline or a ByteArray.
660 Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
661 const TypeIdLowering &TIL,
662 Value *BitOffset) {
663 if (TIL.TheKind == TypeTestResolution::Inline) {
664 // If the bit set is sufficiently small, we can avoid a load by bit testing
665 // a constant.
666 return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
667 } else {
668 Constant *ByteArray = TIL.TheByteArray;
669 if (AvoidReuse && !ImportSummary) {
670 // Each use of the byte array uses a different alias. This makes the
671 // backend less likely to reuse previously computed byte array addresses,
672 // improving the security of the CFI mechanism based on this pass.
673 // This won't work when importing because TheByteArray is external.
674 ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
675 "bits_use", ByteArray, &M);
678 Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
679 Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
681 Value *ByteAndMask =
682 B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
683 return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
687 static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
688 Value *V, uint64_t COffset) {
689 if (auto GV = dyn_cast<GlobalObject>(V)) {
690 SmallVector<MDNode *, 2> Types;
691 GV->getMetadata(LLVMContext::MD_type, Types);
692 for (MDNode *Type : Types) {
693 if (Type->getOperand(1) != TypeId)
694 continue;
695 uint64_t Offset =
696 cast<ConstantInt>(
697 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
698 ->getZExtValue();
699 if (COffset == Offset)
700 return true;
702 return false;
705 if (auto GEP = dyn_cast<GEPOperator>(V)) {
706 APInt APOffset(DL.getPointerSizeInBits(0), 0);
707 bool Result = GEP->accumulateConstantOffset(DL, APOffset);
708 if (!Result)
709 return false;
710 COffset += APOffset.getZExtValue();
711 return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
714 if (auto Op = dyn_cast<Operator>(V)) {
715 if (Op->getOpcode() == Instruction::BitCast)
716 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
718 if (Op->getOpcode() == Instruction::Select)
719 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
720 isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
723 return false;
726 /// Lower a llvm.type.test call to its implementation. Returns the value to
727 /// replace the call with.
728 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
729 const TypeIdLowering &TIL) {
730 if (TIL.TheKind == TypeTestResolution::Unsat)
731 return ConstantInt::getFalse(M.getContext());
733 Value *Ptr = CI->getArgOperand(0);
734 const DataLayout &DL = M.getDataLayout();
735 if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
736 return ConstantInt::getTrue(M.getContext());
738 BasicBlock *InitialBB = CI->getParent();
740 IRBuilder<> B(CI);
742 Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
744 Constant *OffsetedGlobalAsInt =
745 ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
746 if (TIL.TheKind == TypeTestResolution::Single)
747 return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
749 Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
751 // We need to check that the offset both falls within our range and is
752 // suitably aligned. We can check both properties at the same time by
753 // performing a right rotate by log2(alignment) followed by an integer
754 // comparison against the bitset size. The rotate will move the lower
755 // order bits that need to be zero into the higher order bits of the
756 // result, causing the comparison to fail if they are nonzero. The rotate
757 // also conveniently gives us a bit offset to use during the load from
758 // the bitset.
759 Value *OffsetSHR =
760 B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
761 Value *OffsetSHL = B.CreateShl(
762 PtrOffset, ConstantExpr::getZExt(
763 ConstantExpr::getSub(
764 ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
765 TIL.AlignLog2),
766 IntPtrTy));
767 Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
769 Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
771 // If the bit set is all ones, testing against it is unnecessary.
772 if (TIL.TheKind == TypeTestResolution::AllOnes)
773 return OffsetInRange;
775 // See if the intrinsic is used in the following common pattern:
776 // br(llvm.type.test(...), thenbb, elsebb)
777 // where nothing happens between the type test and the br.
778 // If so, create slightly simpler IR.
779 if (CI->hasOneUse())
780 if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin()))
781 if (CI->getNextNode() == Br) {
782 BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator());
783 BasicBlock *Else = Br->getSuccessor(1);
784 BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange);
785 NewBr->setMetadata(LLVMContext::MD_prof,
786 Br->getMetadata(LLVMContext::MD_prof));
787 ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
789 // Update phis in Else resulting from InitialBB being split
790 for (auto &Phi : Else->phis())
791 Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB);
793 IRBuilder<> ThenB(CI);
794 return createBitSetTest(ThenB, TIL, BitOffset);
797 IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
799 // Now that we know that the offset is in range and aligned, load the
800 // appropriate bit from the bitset.
801 Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
803 // The value we want is 0 if we came directly from the initial block
804 // (having failed the range or alignment checks), or the loaded bit if
805 // we came from the block in which we loaded it.
806 B.SetInsertPoint(CI);
807 PHINode *P = B.CreatePHI(Int1Ty, 2);
808 P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
809 P->addIncoming(Bit, ThenB.GetInsertBlock());
810 return P;
813 /// Given a disjoint set of type identifiers and globals, lay out the globals,
814 /// build the bit sets and lower the llvm.type.test calls.
815 void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
816 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
817 // Build a new global with the combined contents of the referenced globals.
818 // This global is a struct whose even-indexed elements contain the original
819 // contents of the referenced globals and whose odd-indexed elements contain
820 // any padding required to align the next element to the next power of 2 plus
821 // any additional padding required to meet its alignment requirements.
822 std::vector<Constant *> GlobalInits;
823 const DataLayout &DL = M.getDataLayout();
824 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
825 Align MaxAlign;
826 uint64_t CurOffset = 0;
827 uint64_t DesiredPadding = 0;
828 for (GlobalTypeMember *G : Globals) {
829 auto *GV = cast<GlobalVariable>(G->getGlobal());
830 MaybeAlign Alignment(GV->getAlignment());
831 if (!Alignment)
832 Alignment = Align(DL.getABITypeAlignment(GV->getValueType()));
833 MaxAlign = std::max(MaxAlign, *Alignment);
834 uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment);
835 GlobalLayout[G] = GVOffset;
836 if (GVOffset != 0) {
837 uint64_t Padding = GVOffset - CurOffset;
838 GlobalInits.push_back(
839 ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
842 GlobalInits.push_back(GV->getInitializer());
843 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
844 CurOffset = GVOffset + InitSize;
846 // Compute the amount of padding that we'd like for the next element.
847 DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize;
849 // Experiments of different caps with Chromium on both x64 and ARM64
850 // have shown that the 32-byte cap generates the smallest binary on
851 // both platforms while different caps yield similar performance.
852 // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
853 if (DesiredPadding > 32)
854 DesiredPadding = alignTo(InitSize, 32) - InitSize;
857 Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
858 auto *CombinedGlobal =
859 new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
860 GlobalValue::PrivateLinkage, NewInit);
861 CombinedGlobal->setAlignment(MaxAlign);
863 StructType *NewTy = cast<StructType>(NewInit->getType());
864 lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
866 // Build aliases pointing to offsets into the combined global for each
867 // global from which we built the combined global, and replace references
868 // to the original globals with references to the aliases.
869 for (unsigned I = 0; I != Globals.size(); ++I) {
870 GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal());
872 // Multiply by 2 to account for padding elements.
873 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
874 ConstantInt::get(Int32Ty, I * 2)};
875 Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
876 NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
877 assert(GV->getType()->getAddressSpace() == 0);
878 GlobalAlias *GAlias =
879 GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(),
880 "", CombinedGlobalElemPtr, &M);
881 GAlias->setVisibility(GV->getVisibility());
882 GAlias->takeName(GV);
883 GV->replaceAllUsesWith(GAlias);
884 GV->eraseFromParent();
888 bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() {
889 return (Arch == Triple::x86 || Arch == Triple::x86_64) &&
890 ObjectFormat == Triple::ELF;
893 /// Export the given type identifier so that ThinLTO backends may import it.
894 /// Type identifiers are exported by adding coarse-grained information about how
895 /// to test the type identifier to the summary, and creating symbols in the
896 /// object file (aliases and absolute symbols) containing fine-grained
897 /// information about the type identifier.
899 /// Returns a pointer to the location in which to store the bitmask, if
900 /// applicable.
901 uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId,
902 const TypeIdLowering &TIL) {
903 TypeTestResolution &TTRes =
904 ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
905 TTRes.TheKind = TIL.TheKind;
907 auto ExportGlobal = [&](StringRef Name, Constant *C) {
908 GlobalAlias *GA =
909 GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
910 "__typeid_" + TypeId + "_" + Name, C, &M);
911 GA->setVisibility(GlobalValue::HiddenVisibility);
914 auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) {
915 if (shouldExportConstantsAsAbsoluteSymbols())
916 ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy));
917 else
918 Storage = cast<ConstantInt>(C)->getZExtValue();
921 if (TIL.TheKind != TypeTestResolution::Unsat)
922 ExportGlobal("global_addr", TIL.OffsetedGlobal);
924 if (TIL.TheKind == TypeTestResolution::ByteArray ||
925 TIL.TheKind == TypeTestResolution::Inline ||
926 TIL.TheKind == TypeTestResolution::AllOnes) {
927 ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2);
928 ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1);
930 uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
931 if (TIL.TheKind == TypeTestResolution::Inline)
932 TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
933 else
934 TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
937 if (TIL.TheKind == TypeTestResolution::ByteArray) {
938 ExportGlobal("byte_array", TIL.TheByteArray);
939 if (shouldExportConstantsAsAbsoluteSymbols())
940 ExportGlobal("bit_mask", TIL.BitMask);
941 else
942 return &TTRes.BitMask;
945 if (TIL.TheKind == TypeTestResolution::Inline)
946 ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits);
948 return nullptr;
951 LowerTypeTestsModule::TypeIdLowering
952 LowerTypeTestsModule::importTypeId(StringRef TypeId) {
953 const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
954 if (!TidSummary)
955 return {}; // Unsat: no globals match this type id.
956 const TypeTestResolution &TTRes = TidSummary->TTRes;
958 TypeIdLowering TIL;
959 TIL.TheKind = TTRes.TheKind;
961 auto ImportGlobal = [&](StringRef Name) {
962 // Give the global a type of length 0 so that it is not assumed not to alias
963 // with any other global.
964 Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(),
965 Int8Arr0Ty);
966 if (auto *GV = dyn_cast<GlobalVariable>(C))
967 GV->setVisibility(GlobalValue::HiddenVisibility);
968 C = ConstantExpr::getBitCast(C, Int8PtrTy);
969 return C;
972 auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth,
973 Type *Ty) {
974 if (!shouldExportConstantsAsAbsoluteSymbols()) {
975 Constant *C =
976 ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const);
977 if (!isa<IntegerType>(Ty))
978 C = ConstantExpr::getIntToPtr(C, Ty);
979 return C;
982 Constant *C = ImportGlobal(Name);
983 auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
984 if (isa<IntegerType>(Ty))
985 C = ConstantExpr::getPtrToInt(C, Ty);
986 if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
987 return C;
989 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
990 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
991 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
992 GV->setMetadata(LLVMContext::MD_absolute_symbol,
993 MDNode::get(M.getContext(), {MinC, MaxC}));
995 if (AbsWidth == IntPtrTy->getBitWidth())
996 SetAbsRange(~0ull, ~0ull); // Full set.
997 else
998 SetAbsRange(0, 1ull << AbsWidth);
999 return C;
1002 if (TIL.TheKind != TypeTestResolution::Unsat)
1003 TIL.OffsetedGlobal = ImportGlobal("global_addr");
1005 if (TIL.TheKind == TypeTestResolution::ByteArray ||
1006 TIL.TheKind == TypeTestResolution::Inline ||
1007 TIL.TheKind == TypeTestResolution::AllOnes) {
1008 TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty);
1009 TIL.SizeM1 =
1010 ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy);
1013 if (TIL.TheKind == TypeTestResolution::ByteArray) {
1014 TIL.TheByteArray = ImportGlobal("byte_array");
1015 TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy);
1018 if (TIL.TheKind == TypeTestResolution::Inline)
1019 TIL.InlineBits = ImportConstant(
1020 "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth,
1021 TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
1023 return TIL;
1026 void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
1027 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
1028 if (!TypeIdMDVal)
1029 report_fatal_error("Second argument of llvm.type.test must be metadata");
1031 auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
1032 if (!TypeIdStr)
1033 report_fatal_error(
1034 "Second argument of llvm.type.test must be a metadata string");
1036 TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
1037 Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
1038 CI->replaceAllUsesWith(Lowered);
1039 CI->eraseFromParent();
1042 // ThinLTO backend: the function F has a jump table entry; update this module
1043 // accordingly. isJumpTableCanonical describes the type of the jump table entry.
1044 void LowerTypeTestsModule::importFunction(
1045 Function *F, bool isJumpTableCanonical,
1046 std::vector<GlobalAlias *> &AliasesToErase) {
1047 assert(F->getType()->getAddressSpace() == 0);
1049 GlobalValue::VisibilityTypes Visibility = F->getVisibility();
1050 std::string Name = F->getName();
1052 if (F->isDeclarationForLinker() && isJumpTableCanonical) {
1053 // Non-dso_local functions may be overriden at run time,
1054 // don't short curcuit them
1055 if (F->isDSOLocal()) {
1056 Function *RealF = Function::Create(F->getFunctionType(),
1057 GlobalValue::ExternalLinkage,
1058 F->getAddressSpace(),
1059 Name + ".cfi", &M);
1060 RealF->setVisibility(GlobalVariable::HiddenVisibility);
1061 replaceDirectCalls(F, RealF);
1063 return;
1066 Function *FDecl;
1067 if (!isJumpTableCanonical) {
1068 // Either a declaration of an external function or a reference to a locally
1069 // defined jump table.
1070 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1071 F->getAddressSpace(), Name + ".cfi_jt", &M);
1072 FDecl->setVisibility(GlobalValue::HiddenVisibility);
1073 } else {
1074 F->setName(Name + ".cfi");
1075 F->setLinkage(GlobalValue::ExternalLinkage);
1076 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1077 F->getAddressSpace(), Name, &M);
1078 FDecl->setVisibility(Visibility);
1079 Visibility = GlobalValue::HiddenVisibility;
1081 // Delete aliases pointing to this function, they'll be re-created in the
1082 // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
1083 // will want to reset the aliasees first.
1084 for (auto &U : F->uses()) {
1085 if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
1086 Function *AliasDecl = Function::Create(
1087 F->getFunctionType(), GlobalValue::ExternalLinkage,
1088 F->getAddressSpace(), "", &M);
1089 AliasDecl->takeName(A);
1090 A->replaceAllUsesWith(AliasDecl);
1091 AliasesToErase.push_back(A);
1096 if (F->hasExternalWeakLinkage())
1097 replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical);
1098 else
1099 replaceCfiUses(F, FDecl, isJumpTableCanonical);
1101 // Set visibility late because it's used in replaceCfiUses() to determine
1102 // whether uses need to to be replaced.
1103 F->setVisibility(Visibility);
1106 void LowerTypeTestsModule::lowerTypeTestCalls(
1107 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
1108 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
1109 CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
1111 // For each type identifier in this disjoint set...
1112 for (Metadata *TypeId : TypeIds) {
1113 // Build the bitset.
1114 BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout);
1115 LLVM_DEBUG({
1116 if (auto MDS = dyn_cast<MDString>(TypeId))
1117 dbgs() << MDS->getString() << ": ";
1118 else
1119 dbgs() << "<unnamed>: ";
1120 BSI.print(dbgs());
1123 ByteArrayInfo *BAI = nullptr;
1124 TypeIdLowering TIL;
1125 TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
1126 Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
1127 TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
1128 TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
1129 if (BSI.isAllOnes()) {
1130 TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
1131 : TypeTestResolution::AllOnes;
1132 } else if (BSI.BitSize <= 64) {
1133 TIL.TheKind = TypeTestResolution::Inline;
1134 uint64_t InlineBits = 0;
1135 for (auto Bit : BSI.Bits)
1136 InlineBits |= uint64_t(1) << Bit;
1137 if (InlineBits == 0)
1138 TIL.TheKind = TypeTestResolution::Unsat;
1139 else
1140 TIL.InlineBits = ConstantInt::get(
1141 (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
1142 } else {
1143 TIL.TheKind = TypeTestResolution::ByteArray;
1144 ++NumByteArraysCreated;
1145 BAI = createByteArray(BSI);
1146 TIL.TheByteArray = BAI->ByteArray;
1147 TIL.BitMask = BAI->MaskGlobal;
1150 TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
1152 if (TIUI.IsExported) {
1153 uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
1154 if (BAI)
1155 BAI->MaskPtr = MaskPtr;
1158 // Lower each call to llvm.type.test for this type identifier.
1159 for (CallInst *CI : TIUI.CallSites) {
1160 ++NumTypeTestCallsLowered;
1161 Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
1162 CI->replaceAllUsesWith(Lowered);
1163 CI->eraseFromParent();
1168 void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
1169 if (Type->getNumOperands() != 2)
1170 report_fatal_error("All operands of type metadata must have 2 elements");
1172 if (GO->isThreadLocal())
1173 report_fatal_error("Bit set element may not be thread-local");
1174 if (isa<GlobalVariable>(GO) && GO->hasSection())
1175 report_fatal_error(
1176 "A member of a type identifier may not have an explicit section");
1178 // FIXME: We previously checked that global var member of a type identifier
1179 // must be a definition, but the IR linker may leave type metadata on
1180 // declarations. We should restore this check after fixing PR31759.
1182 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
1183 if (!OffsetConstMD)
1184 report_fatal_error("Type offset must be a constant");
1185 auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
1186 if (!OffsetInt)
1187 report_fatal_error("Type offset must be an integer constant");
1190 static const unsigned kX86JumpTableEntrySize = 8;
1191 static const unsigned kARMJumpTableEntrySize = 4;
1193 unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
1194 switch (Arch) {
1195 case Triple::x86:
1196 case Triple::x86_64:
1197 return kX86JumpTableEntrySize;
1198 case Triple::arm:
1199 case Triple::thumb:
1200 case Triple::aarch64:
1201 return kARMJumpTableEntrySize;
1202 default:
1203 report_fatal_error("Unsupported architecture for jump tables");
1207 // Create a jump table entry for the target. This consists of an instruction
1208 // sequence containing a relative branch to Dest. Appends inline asm text,
1209 // constraints and arguments to AsmOS, ConstraintOS and AsmArgs.
1210 void LowerTypeTestsModule::createJumpTableEntry(
1211 raw_ostream &AsmOS, raw_ostream &ConstraintOS,
1212 Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs,
1213 Function *Dest) {
1214 unsigned ArgIndex = AsmArgs.size();
1216 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
1217 AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
1218 AsmOS << "int3\nint3\nint3\n";
1219 } else if (JumpTableArch == Triple::arm || JumpTableArch == Triple::aarch64) {
1220 AsmOS << "b $" << ArgIndex << "\n";
1221 } else if (JumpTableArch == Triple::thumb) {
1222 AsmOS << "b.w $" << ArgIndex << "\n";
1223 } else {
1224 report_fatal_error("Unsupported architecture for jump tables");
1227 ConstraintOS << (ArgIndex > 0 ? ",s" : "s");
1228 AsmArgs.push_back(Dest);
1231 Type *LowerTypeTestsModule::getJumpTableEntryType() {
1232 return ArrayType::get(Int8Ty, getJumpTableEntrySize());
1235 /// Given a disjoint set of type identifiers and functions, build the bit sets
1236 /// and lower the llvm.type.test calls, architecture dependently.
1237 void LowerTypeTestsModule::buildBitSetsFromFunctions(
1238 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1239 if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
1240 Arch == Triple::thumb || Arch == Triple::aarch64)
1241 buildBitSetsFromFunctionsNative(TypeIds, Functions);
1242 else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
1243 buildBitSetsFromFunctionsWASM(TypeIds, Functions);
1244 else
1245 report_fatal_error("Unsupported architecture for jump tables");
1248 void LowerTypeTestsModule::moveInitializerToModuleConstructor(
1249 GlobalVariable *GV) {
1250 if (WeakInitializerFn == nullptr) {
1251 WeakInitializerFn = Function::Create(
1252 FunctionType::get(Type::getVoidTy(M.getContext()),
1253 /* IsVarArg */ false),
1254 GlobalValue::InternalLinkage,
1255 M.getDataLayout().getProgramAddressSpace(),
1256 "__cfi_global_var_init", &M);
1257 BasicBlock *BB =
1258 BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
1259 ReturnInst::Create(M.getContext(), BB);
1260 WeakInitializerFn->setSection(
1261 ObjectFormat == Triple::MachO
1262 ? "__TEXT,__StaticInit,regular,pure_instructions"
1263 : ".text.startup");
1264 // This code is equivalent to relocation application, and should run at the
1265 // earliest possible time (i.e. with the highest priority).
1266 appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0);
1269 IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
1270 GV->setConstant(false);
1271 IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlignment());
1272 GV->setInitializer(Constant::getNullValue(GV->getValueType()));
1275 void LowerTypeTestsModule::findGlobalVariableUsersOf(
1276 Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) {
1277 for (auto *U : C->users()){
1278 if (auto *GV = dyn_cast<GlobalVariable>(U))
1279 Out.insert(GV);
1280 else if (auto *C2 = dyn_cast<Constant>(U))
1281 findGlobalVariableUsersOf(C2, Out);
1285 // Replace all uses of F with (F ? JT : 0).
1286 void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
1287 Function *F, Constant *JT, bool IsJumpTableCanonical) {
1288 // The target expression can not appear in a constant initializer on most
1289 // (all?) targets. Switch to a runtime initializer.
1290 SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
1291 findGlobalVariableUsersOf(F, GlobalVarUsers);
1292 for (auto GV : GlobalVarUsers)
1293 moveInitializerToModuleConstructor(GV);
1295 // Can not RAUW F with an expression that uses F. Replace with a temporary
1296 // placeholder first.
1297 Function *PlaceholderFn =
1298 Function::Create(cast<FunctionType>(F->getValueType()),
1299 GlobalValue::ExternalWeakLinkage,
1300 F->getAddressSpace(), "", &M);
1301 replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
1303 Constant *Target = ConstantExpr::getSelect(
1304 ConstantExpr::getICmp(CmpInst::ICMP_NE, F,
1305 Constant::getNullValue(F->getType())),
1306 JT, Constant::getNullValue(F->getType()));
1307 PlaceholderFn->replaceAllUsesWith(Target);
1308 PlaceholderFn->eraseFromParent();
1311 static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
1312 Attribute TFAttr = F->getFnAttribute("target-features");
1313 if (!TFAttr.hasAttribute(Attribute::None)) {
1314 SmallVector<StringRef, 6> Features;
1315 TFAttr.getValueAsString().split(Features, ',');
1316 for (StringRef Feature : Features) {
1317 if (Feature == "-thumb-mode")
1318 return false;
1319 else if (Feature == "+thumb-mode")
1320 return true;
1324 return ModuleArch == Triple::thumb;
1327 // Each jump table must be either ARM or Thumb as a whole for the bit-test math
1328 // to work. Pick one that matches the majority of members to minimize interop
1329 // veneers inserted by the linker.
1330 static Triple::ArchType
1331 selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
1332 Triple::ArchType ModuleArch) {
1333 if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb)
1334 return ModuleArch;
1336 unsigned ArmCount = 0, ThumbCount = 0;
1337 for (const auto GTM : Functions) {
1338 if (!GTM->isJumpTableCanonical()) {
1339 // PLT stubs are always ARM.
1340 // FIXME: This is the wrong heuristic for non-canonical jump tables.
1341 ++ArmCount;
1342 continue;
1345 Function *F = cast<Function>(GTM->getGlobal());
1346 ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount);
1349 return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
1352 void LowerTypeTestsModule::createJumpTable(
1353 Function *F, ArrayRef<GlobalTypeMember *> Functions) {
1354 std::string AsmStr, ConstraintStr;
1355 raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr);
1356 SmallVector<Value *, 16> AsmArgs;
1357 AsmArgs.reserve(Functions.size() * 2);
1359 Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch);
1361 for (unsigned I = 0; I != Functions.size(); ++I)
1362 createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
1363 cast<Function>(Functions[I]->getGlobal()));
1365 // Align the whole table by entry size.
1366 F->setAlignment(Align(getJumpTableEntrySize()));
1367 // Skip prologue.
1368 // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
1369 // Luckily, this function does not get any prologue even without the
1370 // attribute.
1371 if (OS != Triple::Win32)
1372 F->addFnAttr(Attribute::Naked);
1373 if (JumpTableArch == Triple::arm)
1374 F->addFnAttr("target-features", "-thumb-mode");
1375 if (JumpTableArch == Triple::thumb) {
1376 F->addFnAttr("target-features", "+thumb-mode");
1377 // Thumb jump table assembly needs Thumb2. The following attribute is added
1378 // by Clang for -march=armv7.
1379 F->addFnAttr("target-cpu", "cortex-a8");
1381 // Make sure we don't emit .eh_frame for this function.
1382 F->addFnAttr(Attribute::NoUnwind);
1384 BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
1385 IRBuilder<> IRB(BB);
1387 SmallVector<Type *, 16> ArgTypes;
1388 ArgTypes.reserve(AsmArgs.size());
1389 for (const auto &Arg : AsmArgs)
1390 ArgTypes.push_back(Arg->getType());
1391 InlineAsm *JumpTableAsm =
1392 InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false),
1393 AsmOS.str(), ConstraintOS.str(),
1394 /*hasSideEffects=*/true);
1396 IRB.CreateCall(JumpTableAsm, AsmArgs);
1397 IRB.CreateUnreachable();
1400 /// Given a disjoint set of type identifiers and functions, build a jump table
1401 /// for the functions, build the bit sets and lower the llvm.type.test calls.
1402 void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
1403 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1404 // Unlike the global bitset builder, the function bitset builder cannot
1405 // re-arrange functions in a particular order and base its calculations on the
1406 // layout of the functions' entry points, as we have no idea how large a
1407 // particular function will end up being (the size could even depend on what
1408 // this pass does!) Instead, we build a jump table, which is a block of code
1409 // consisting of one branch instruction for each of the functions in the bit
1410 // set that branches to the target function, and redirect any taken function
1411 // addresses to the corresponding jump table entry. In the object file's
1412 // symbol table, the symbols for the target functions also refer to the jump
1413 // table entries, so that addresses taken outside the module will pass any
1414 // verification done inside the module.
1416 // In more concrete terms, suppose we have three functions f, g, h which are
1417 // of the same type, and a function foo that returns their addresses:
1419 // f:
1420 // mov 0, %eax
1421 // ret
1423 // g:
1424 // mov 1, %eax
1425 // ret
1427 // h:
1428 // mov 2, %eax
1429 // ret
1431 // foo:
1432 // mov f, %eax
1433 // mov g, %edx
1434 // mov h, %ecx
1435 // ret
1437 // We output the jump table as module-level inline asm string. The end result
1438 // will (conceptually) look like this:
1440 // f = .cfi.jumptable
1441 // g = .cfi.jumptable + 4
1442 // h = .cfi.jumptable + 8
1443 // .cfi.jumptable:
1444 // jmp f.cfi ; 5 bytes
1445 // int3 ; 1 byte
1446 // int3 ; 1 byte
1447 // int3 ; 1 byte
1448 // jmp g.cfi ; 5 bytes
1449 // int3 ; 1 byte
1450 // int3 ; 1 byte
1451 // int3 ; 1 byte
1452 // jmp h.cfi ; 5 bytes
1453 // int3 ; 1 byte
1454 // int3 ; 1 byte
1455 // int3 ; 1 byte
1457 // f.cfi:
1458 // mov 0, %eax
1459 // ret
1461 // g.cfi:
1462 // mov 1, %eax
1463 // ret
1465 // h.cfi:
1466 // mov 2, %eax
1467 // ret
1469 // foo:
1470 // mov f, %eax
1471 // mov g, %edx
1472 // mov h, %ecx
1473 // ret
1475 // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
1476 // normal case the check can be carried out using the same kind of simple
1477 // arithmetic that we normally use for globals.
1479 // FIXME: find a better way to represent the jumptable in the IR.
1480 assert(!Functions.empty());
1482 // Build a simple layout based on the regular layout of jump tables.
1483 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1484 unsigned EntrySize = getJumpTableEntrySize();
1485 for (unsigned I = 0; I != Functions.size(); ++I)
1486 GlobalLayout[Functions[I]] = I * EntrySize;
1488 Function *JumpTableFn =
1489 Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
1490 /* IsVarArg */ false),
1491 GlobalValue::PrivateLinkage,
1492 M.getDataLayout().getProgramAddressSpace(),
1493 ".cfi.jumptable", &M);
1494 ArrayType *JumpTableType =
1495 ArrayType::get(getJumpTableEntryType(), Functions.size());
1496 auto JumpTable =
1497 ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0));
1499 lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
1502 ScopedSaveAliaseesAndUsed S(M);
1504 // Build aliases pointing to offsets into the jump table, and replace
1505 // references to the original functions with references to the aliases.
1506 for (unsigned I = 0; I != Functions.size(); ++I) {
1507 Function *F = cast<Function>(Functions[I]->getGlobal());
1508 bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
1510 Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
1511 ConstantExpr::getInBoundsGetElementPtr(
1512 JumpTableType, JumpTable,
1513 ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
1514 ConstantInt::get(IntPtrTy, I)}),
1515 F->getType());
1516 if (Functions[I]->isExported()) {
1517 if (IsJumpTableCanonical) {
1518 ExportSummary->cfiFunctionDefs().insert(F->getName());
1519 } else {
1520 GlobalAlias *JtAlias = GlobalAlias::create(
1521 F->getValueType(), 0, GlobalValue::ExternalLinkage,
1522 F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
1523 JtAlias->setVisibility(GlobalValue::HiddenVisibility);
1524 ExportSummary->cfiFunctionDecls().insert(F->getName());
1527 if (!IsJumpTableCanonical) {
1528 if (F->hasExternalWeakLinkage())
1529 replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
1530 IsJumpTableCanonical);
1531 else
1532 replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical);
1533 } else {
1534 assert(F->getType()->getAddressSpace() == 0);
1536 GlobalAlias *FAlias =
1537 GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "",
1538 CombinedGlobalElemPtr, &M);
1539 FAlias->setVisibility(F->getVisibility());
1540 FAlias->takeName(F);
1541 if (FAlias->hasName())
1542 F->setName(FAlias->getName() + ".cfi");
1543 replaceCfiUses(F, FAlias, IsJumpTableCanonical);
1544 if (!F->hasLocalLinkage())
1545 F->setVisibility(GlobalVariable::HiddenVisibility);
1550 createJumpTable(JumpTableFn, Functions);
1553 /// Assign a dummy layout using an incrementing counter, tag each function
1554 /// with its index represented as metadata, and lower each type test to an
1555 /// integer range comparison. During generation of the indirect function call
1556 /// table in the backend, it will assign the given indexes.
1557 /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet
1558 /// been finalized.
1559 void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
1560 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1561 assert(!Functions.empty());
1563 // Build consecutive monotonic integer ranges for each call target set
1564 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1566 for (GlobalTypeMember *GTM : Functions) {
1567 Function *F = cast<Function>(GTM->getGlobal());
1569 // Skip functions that are not address taken, to avoid bloating the table
1570 if (!F->hasAddressTaken())
1571 continue;
1573 // Store metadata with the index for each function
1574 MDNode *MD = MDNode::get(F->getContext(),
1575 ArrayRef<Metadata *>(ConstantAsMetadata::get(
1576 ConstantInt::get(Int64Ty, IndirectIndex))));
1577 F->setMetadata("wasm.index", MD);
1579 // Assign the counter value
1580 GlobalLayout[GTM] = IndirectIndex++;
1583 // The indirect function table index space starts at zero, so pass a NULL
1584 // pointer as the subtracted "jump table" offset.
1585 lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy),
1586 GlobalLayout);
1589 void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
1590 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals,
1591 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) {
1592 DenseMap<Metadata *, uint64_t> TypeIdIndices;
1593 for (unsigned I = 0; I != TypeIds.size(); ++I)
1594 TypeIdIndices[TypeIds[I]] = I;
1596 // For each type identifier, build a set of indices that refer to members of
1597 // the type identifier.
1598 std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
1599 unsigned GlobalIndex = 0;
1600 DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices;
1601 for (GlobalTypeMember *GTM : Globals) {
1602 for (MDNode *Type : GTM->types()) {
1603 // Type = { offset, type identifier }
1604 auto I = TypeIdIndices.find(Type->getOperand(1));
1605 if (I != TypeIdIndices.end())
1606 TypeMembers[I->second].insert(GlobalIndex);
1608 GlobalIndices[GTM] = GlobalIndex;
1609 GlobalIndex++;
1612 for (ICallBranchFunnel *JT : ICallBranchFunnels) {
1613 TypeMembers.emplace_back();
1614 std::set<uint64_t> &TMSet = TypeMembers.back();
1615 for (GlobalTypeMember *T : JT->targets())
1616 TMSet.insert(GlobalIndices[T]);
1619 // Order the sets of indices by size. The GlobalLayoutBuilder works best
1620 // when given small index sets first.
1621 llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
1622 const std::set<uint64_t> &O2) {
1623 return O1.size() < O2.size();
1626 // Create a GlobalLayoutBuilder and provide it with index sets as layout
1627 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
1628 // close together as possible.
1629 GlobalLayoutBuilder GLB(Globals.size());
1630 for (auto &&MemSet : TypeMembers)
1631 GLB.addFragment(MemSet);
1633 // Build a vector of globals with the computed layout.
1634 bool IsGlobalSet =
1635 Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal());
1636 std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size());
1637 auto OGTMI = OrderedGTMs.begin();
1638 for (auto &&F : GLB.Fragments) {
1639 for (auto &&Offset : F) {
1640 if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal()))
1641 report_fatal_error("Type identifier may not contain both global "
1642 "variables and functions");
1643 *OGTMI++ = Globals[Offset];
1647 // Build the bitsets from this disjoint set.
1648 if (IsGlobalSet)
1649 buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs);
1650 else
1651 buildBitSetsFromFunctions(TypeIds, OrderedGTMs);
1654 /// Lower all type tests in this module.
1655 LowerTypeTestsModule::LowerTypeTestsModule(
1656 Module &M, ModuleSummaryIndex *ExportSummary,
1657 const ModuleSummaryIndex *ImportSummary)
1658 : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
1659 assert(!(ExportSummary && ImportSummary));
1660 Triple TargetTriple(M.getTargetTriple());
1661 Arch = TargetTriple.getArch();
1662 OS = TargetTriple.getOS();
1663 ObjectFormat = TargetTriple.getObjectFormat();
1666 bool LowerTypeTestsModule::runForTesting(Module &M) {
1667 ModuleSummaryIndex Summary(/*HaveGVs=*/false);
1669 // Handle the command-line summary arguments. This code is for testing
1670 // purposes only, so we handle errors directly.
1671 if (!ClReadSummary.empty()) {
1672 ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
1673 ": ");
1674 auto ReadSummaryFile =
1675 ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
1677 yaml::Input In(ReadSummaryFile->getBuffer());
1678 In >> Summary;
1679 ExitOnErr(errorCodeToError(In.error()));
1682 bool Changed =
1683 LowerTypeTestsModule(
1684 M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
1685 ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
1686 .lower();
1688 if (!ClWriteSummary.empty()) {
1689 ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
1690 ": ");
1691 std::error_code EC;
1692 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
1693 ExitOnErr(errorCodeToError(EC));
1695 yaml::Output Out(OS);
1696 Out << Summary;
1699 return Changed;
1702 static bool isDirectCall(Use& U) {
1703 auto *Usr = dyn_cast<CallInst>(U.getUser());
1704 if (Usr) {
1705 CallSite CS(Usr);
1706 if (CS.isCallee(&U))
1707 return true;
1709 return false;
1712 void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
1713 bool IsJumpTableCanonical) {
1714 SmallSetVector<Constant *, 4> Constants;
1715 auto UI = Old->use_begin(), E = Old->use_end();
1716 for (; UI != E;) {
1717 Use &U = *UI;
1718 ++UI;
1720 // Skip block addresses
1721 if (isa<BlockAddress>(U.getUser()))
1722 continue;
1724 // Skip direct calls to externally defined or non-dso_local functions
1725 if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
1726 continue;
1728 // Must handle Constants specially, we cannot call replaceUsesOfWith on a
1729 // constant because they are uniqued.
1730 if (auto *C = dyn_cast<Constant>(U.getUser())) {
1731 if (!isa<GlobalValue>(C)) {
1732 // Save unique users to avoid processing operand replacement
1733 // more than once.
1734 Constants.insert(C);
1735 continue;
1739 U.set(New);
1742 // Process operand replacement of saved constants.
1743 for (auto *C : Constants)
1744 C->handleOperandChange(Old, New);
1747 void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
1748 Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); });
1751 bool LowerTypeTestsModule::lower() {
1752 // If only some of the modules were split, we cannot correctly perform
1753 // this transformation. We already checked for the presense of type tests
1754 // with partially split modules during the thin link, and would have emitted
1755 // an error if any were found, so here we can simply return.
1756 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
1757 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
1758 return false;
1760 Function *TypeTestFunc =
1761 M.getFunction(Intrinsic::getName(Intrinsic::type_test));
1762 Function *ICallBranchFunnelFunc =
1763 M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel));
1764 if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
1765 (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) &&
1766 !ExportSummary && !ImportSummary)
1767 return false;
1769 if (ImportSummary) {
1770 if (TypeTestFunc) {
1771 for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
1772 UI != UE;) {
1773 auto *CI = cast<CallInst>((*UI++).getUser());
1774 importTypeTest(CI);
1778 if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
1779 report_fatal_error(
1780 "unexpected call to llvm.icall.branch.funnel during import phase");
1782 SmallVector<Function *, 8> Defs;
1783 SmallVector<Function *, 8> Decls;
1784 for (auto &F : M) {
1785 // CFI functions are either external, or promoted. A local function may
1786 // have the same name, but it's not the one we are looking for.
1787 if (F.hasLocalLinkage())
1788 continue;
1789 if (ImportSummary->cfiFunctionDefs().count(F.getName()))
1790 Defs.push_back(&F);
1791 else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
1792 Decls.push_back(&F);
1795 std::vector<GlobalAlias *> AliasesToErase;
1797 ScopedSaveAliaseesAndUsed S(M);
1798 for (auto F : Defs)
1799 importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
1800 for (auto F : Decls)
1801 importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
1803 for (GlobalAlias *GA : AliasesToErase)
1804 GA->eraseFromParent();
1806 return true;
1809 // Equivalence class set containing type identifiers and the globals that
1810 // reference them. This is used to partition the set of type identifiers in
1811 // the module into disjoint sets.
1812 using GlobalClassesTy = EquivalenceClasses<
1813 PointerUnion3<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
1814 GlobalClassesTy GlobalClasses;
1816 // Verify the type metadata and build a few data structures to let us
1817 // efficiently enumerate the type identifiers associated with a global:
1818 // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector
1819 // of associated type metadata) and a mapping from type identifiers to their
1820 // list of GlobalTypeMembers and last observed index in the list of globals.
1821 // The indices will be used later to deterministically order the list of type
1822 // identifiers.
1823 BumpPtrAllocator Alloc;
1824 struct TIInfo {
1825 unsigned UniqueId;
1826 std::vector<GlobalTypeMember *> RefGlobals;
1828 DenseMap<Metadata *, TIInfo> TypeIdInfo;
1829 unsigned CurUniqueId = 0;
1830 SmallVector<MDNode *, 2> Types;
1832 // Cross-DSO CFI emits jumptable entries for exported functions as well as
1833 // address taken functions in case they are address taken in other modules.
1834 const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr;
1836 struct ExportedFunctionInfo {
1837 CfiFunctionLinkage Linkage;
1838 MDNode *FuncMD; // {name, linkage, type[, type...]}
1840 DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
1841 if (ExportSummary) {
1842 // A set of all functions that are address taken by a live global object.
1843 DenseSet<GlobalValue::GUID> AddressTaken;
1844 for (auto &I : *ExportSummary)
1845 for (auto &GVS : I.second.SummaryList)
1846 if (GVS->isLive())
1847 for (auto &Ref : GVS->refs())
1848 AddressTaken.insert(Ref.getGUID());
1850 NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
1851 if (CfiFunctionsMD) {
1852 for (auto FuncMD : CfiFunctionsMD->operands()) {
1853 assert(FuncMD->getNumOperands() >= 2);
1854 StringRef FunctionName =
1855 cast<MDString>(FuncMD->getOperand(0))->getString();
1856 CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
1857 cast<ConstantAsMetadata>(FuncMD->getOperand(1))
1858 ->getValue()
1859 ->getUniqueInteger()
1860 .getZExtValue());
1861 const GlobalValue::GUID GUID = GlobalValue::getGUID(
1862 GlobalValue::dropLLVMManglingEscape(FunctionName));
1863 // Do not emit jumptable entries for functions that are not-live and
1864 // have no live references (and are not exported with cross-DSO CFI.)
1865 if (!ExportSummary->isGUIDLive(GUID))
1866 continue;
1867 if (!AddressTaken.count(GUID)) {
1868 if (!CrossDsoCfi || Linkage != CFL_Definition)
1869 continue;
1871 bool Exported = false;
1872 if (auto VI = ExportSummary->getValueInfo(GUID))
1873 for (auto &GVS : VI.getSummaryList())
1874 if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage()))
1875 Exported = true;
1877 if (!Exported)
1878 continue;
1880 auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
1881 if (!P.second && P.first->second.Linkage != CFL_Definition)
1882 P.first->second = {Linkage, FuncMD};
1885 for (const auto &P : ExportedFunctions) {
1886 StringRef FunctionName = P.first;
1887 CfiFunctionLinkage Linkage = P.second.Linkage;
1888 MDNode *FuncMD = P.second.FuncMD;
1889 Function *F = M.getFunction(FunctionName);
1890 if (F && F->hasLocalLinkage()) {
1891 // Locally defined function that happens to have the same name as a
1892 // function defined in a ThinLTO module. Rename it to move it out of
1893 // the way of the external reference that we're about to create.
1894 // Note that setName will find a unique name for the function, so even
1895 // if there is an existing function with the suffix there won't be a
1896 // name collision.
1897 F->setName(F->getName() + ".1");
1898 F = nullptr;
1901 if (!F)
1902 F = Function::Create(
1903 FunctionType::get(Type::getVoidTy(M.getContext()), false),
1904 GlobalVariable::ExternalLinkage,
1905 M.getDataLayout().getProgramAddressSpace(), FunctionName, &M);
1907 // If the function is available_externally, remove its definition so
1908 // that it is handled the same way as a declaration. Later we will try
1909 // to create an alias using this function's linkage, which will fail if
1910 // the linkage is available_externally. This will also result in us
1911 // following the code path below to replace the type metadata.
1912 if (F->hasAvailableExternallyLinkage()) {
1913 F->setLinkage(GlobalValue::ExternalLinkage);
1914 F->deleteBody();
1915 F->setComdat(nullptr);
1916 F->clearMetadata();
1919 // Update the linkage for extern_weak declarations when a definition
1920 // exists.
1921 if (Linkage == CFL_Definition && F->hasExternalWeakLinkage())
1922 F->setLinkage(GlobalValue::ExternalLinkage);
1924 // If the function in the full LTO module is a declaration, replace its
1925 // type metadata with the type metadata we found in cfi.functions. That
1926 // metadata is presumed to be more accurate than the metadata attached
1927 // to the declaration.
1928 if (F->isDeclaration()) {
1929 if (Linkage == CFL_WeakDeclaration)
1930 F->setLinkage(GlobalValue::ExternalWeakLinkage);
1932 F->eraseMetadata(LLVMContext::MD_type);
1933 for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
1934 F->addMetadata(LLVMContext::MD_type,
1935 *cast<MDNode>(FuncMD->getOperand(I).get()));
1941 DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers;
1942 for (GlobalObject &GO : M.global_objects()) {
1943 if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
1944 continue;
1946 Types.clear();
1947 GO.getMetadata(LLVMContext::MD_type, Types);
1949 bool IsJumpTableCanonical = false;
1950 bool IsExported = false;
1951 if (Function *F = dyn_cast<Function>(&GO)) {
1952 IsJumpTableCanonical = isJumpTableCanonical(F);
1953 if (ExportedFunctions.count(F->getName())) {
1954 IsJumpTableCanonical |=
1955 ExportedFunctions[F->getName()].Linkage == CFL_Definition;
1956 IsExported = true;
1957 // TODO: The logic here checks only that the function is address taken,
1958 // not that the address takers are live. This can be updated to check
1959 // their liveness and emit fewer jumptable entries once monolithic LTO
1960 // builds also emit summaries.
1961 } else if (!F->hasAddressTaken()) {
1962 if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
1963 continue;
1967 auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical,
1968 IsExported, Types);
1969 GlobalTypeMembers[&GO] = GTM;
1970 for (MDNode *Type : Types) {
1971 verifyTypeMDNode(&GO, Type);
1972 auto &Info = TypeIdInfo[Type->getOperand(1)];
1973 Info.UniqueId = ++CurUniqueId;
1974 Info.RefGlobals.push_back(GTM);
1978 auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
1979 // Add the call site to the list of call sites for this type identifier. We
1980 // also use TypeIdUsers to keep track of whether we have seen this type
1981 // identifier before. If we have, we don't need to re-add the referenced
1982 // globals to the equivalence class.
1983 auto Ins = TypeIdUsers.insert({TypeId, {}});
1984 if (Ins.second) {
1985 // Add the type identifier to the equivalence class.
1986 GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId);
1987 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
1989 // Add the referenced globals to the type identifier's equivalence class.
1990 for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
1991 CurSet = GlobalClasses.unionSets(
1992 CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
1995 return Ins.first->second;
1998 if (TypeTestFunc) {
1999 for (const Use &U : TypeTestFunc->uses()) {
2000 auto CI = cast<CallInst>(U.getUser());
2002 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
2003 if (!TypeIdMDVal)
2004 report_fatal_error("Second argument of llvm.type.test must be metadata");
2005 auto TypeId = TypeIdMDVal->getMetadata();
2006 AddTypeIdUse(TypeId).CallSites.push_back(CI);
2010 if (ICallBranchFunnelFunc) {
2011 for (const Use &U : ICallBranchFunnelFunc->uses()) {
2012 if (Arch != Triple::x86_64)
2013 report_fatal_error(
2014 "llvm.icall.branch.funnel not supported on this target");
2016 auto CI = cast<CallInst>(U.getUser());
2018 std::vector<GlobalTypeMember *> Targets;
2019 if (CI->getNumArgOperands() % 2 != 1)
2020 report_fatal_error("number of arguments should be odd");
2022 GlobalClassesTy::member_iterator CurSet;
2023 for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) {
2024 int64_t Offset;
2025 auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
2026 CI->getOperand(I), Offset, M.getDataLayout()));
2027 if (!Base)
2028 report_fatal_error(
2029 "Expected branch funnel operand to be global value");
2031 GlobalTypeMember *GTM = GlobalTypeMembers[Base];
2032 Targets.push_back(GTM);
2033 GlobalClassesTy::member_iterator NewSet =
2034 GlobalClasses.findLeader(GlobalClasses.insert(GTM));
2035 if (I == 1)
2036 CurSet = NewSet;
2037 else
2038 CurSet = GlobalClasses.unionSets(CurSet, NewSet);
2041 GlobalClasses.unionSets(
2042 CurSet, GlobalClasses.findLeader(
2043 GlobalClasses.insert(ICallBranchFunnel::create(
2044 Alloc, CI, Targets, ++CurUniqueId))));
2048 if (ExportSummary) {
2049 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2050 for (auto &P : TypeIdInfo) {
2051 if (auto *TypeId = dyn_cast<MDString>(P.first))
2052 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2053 TypeId);
2056 for (auto &P : *ExportSummary) {
2057 for (auto &S : P.second.SummaryList) {
2058 if (!ExportSummary->isGlobalValueLive(S.get()))
2059 continue;
2060 if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()))
2061 for (GlobalValue::GUID G : FS->type_tests())
2062 for (Metadata *MD : MetadataByGUID[G])
2063 AddTypeIdUse(MD).IsExported = true;
2068 if (GlobalClasses.empty())
2069 return false;
2071 // Build a list of disjoint sets ordered by their maximum global index for
2072 // determinism.
2073 std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
2074 for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
2075 E = GlobalClasses.end();
2076 I != E; ++I) {
2077 if (!I->isLeader())
2078 continue;
2079 ++NumTypeIdDisjointSets;
2081 unsigned MaxUniqueId = 0;
2082 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
2083 MI != GlobalClasses.member_end(); ++MI) {
2084 if (auto *MD = MI->dyn_cast<Metadata *>())
2085 MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId);
2086 else if (auto *BF = MI->dyn_cast<ICallBranchFunnel *>())
2087 MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId);
2089 Sets.emplace_back(I, MaxUniqueId);
2091 llvm::sort(Sets,
2092 [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
2093 const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
2094 return S1.second < S2.second;
2097 // For each disjoint set we found...
2098 for (const auto &S : Sets) {
2099 // Build the list of type identifiers in this disjoint set.
2100 std::vector<Metadata *> TypeIds;
2101 std::vector<GlobalTypeMember *> Globals;
2102 std::vector<ICallBranchFunnel *> ICallBranchFunnels;
2103 for (GlobalClassesTy::member_iterator MI =
2104 GlobalClasses.member_begin(S.first);
2105 MI != GlobalClasses.member_end(); ++MI) {
2106 if (MI->is<Metadata *>())
2107 TypeIds.push_back(MI->get<Metadata *>());
2108 else if (MI->is<GlobalTypeMember *>())
2109 Globals.push_back(MI->get<GlobalTypeMember *>());
2110 else
2111 ICallBranchFunnels.push_back(MI->get<ICallBranchFunnel *>());
2114 // Order type identifiers by unique ID for determinism. This ordering is
2115 // stable as there is a one-to-one mapping between metadata and unique IDs.
2116 llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) {
2117 return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
2120 // Same for the branch funnels.
2121 llvm::sort(ICallBranchFunnels,
2122 [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
2123 return F1->UniqueId < F2->UniqueId;
2126 // Build bitsets for this disjoint set.
2127 buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels);
2130 allocateByteArrays();
2132 // Parse alias data to replace stand-in function declarations for aliases
2133 // with an alias to the intended target.
2134 if (ExportSummary) {
2135 if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) {
2136 for (auto AliasMD : AliasesMD->operands()) {
2137 assert(AliasMD->getNumOperands() >= 4);
2138 StringRef AliasName =
2139 cast<MDString>(AliasMD->getOperand(0))->getString();
2140 StringRef Aliasee = cast<MDString>(AliasMD->getOperand(1))->getString();
2142 if (!ExportedFunctions.count(Aliasee) ||
2143 ExportedFunctions[Aliasee].Linkage != CFL_Definition ||
2144 !M.getNamedAlias(Aliasee))
2145 continue;
2147 GlobalValue::VisibilityTypes Visibility =
2148 static_cast<GlobalValue::VisibilityTypes>(
2149 cast<ConstantAsMetadata>(AliasMD->getOperand(2))
2150 ->getValue()
2151 ->getUniqueInteger()
2152 .getZExtValue());
2153 bool Weak =
2154 static_cast<bool>(cast<ConstantAsMetadata>(AliasMD->getOperand(3))
2155 ->getValue()
2156 ->getUniqueInteger()
2157 .getZExtValue());
2159 auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee));
2160 Alias->setVisibility(Visibility);
2161 if (Weak)
2162 Alias->setLinkage(GlobalValue::WeakAnyLinkage);
2164 if (auto *F = M.getFunction(AliasName)) {
2165 Alias->takeName(F);
2166 F->replaceAllUsesWith(Alias);
2167 F->eraseFromParent();
2168 } else {
2169 Alias->setName(AliasName);
2175 // Emit .symver directives for exported functions, if they exist.
2176 if (ExportSummary) {
2177 if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) {
2178 for (auto Symver : SymversMD->operands()) {
2179 assert(Symver->getNumOperands() >= 2);
2180 StringRef SymbolName =
2181 cast<MDString>(Symver->getOperand(0))->getString();
2182 StringRef Alias = cast<MDString>(Symver->getOperand(1))->getString();
2184 if (!ExportedFunctions.count(SymbolName))
2185 continue;
2187 M.appendModuleInlineAsm(
2188 (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str());
2193 return true;
2196 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
2197 ModuleAnalysisManager &AM) {
2198 bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
2199 if (!Changed)
2200 return PreservedAnalyses::all();
2201 return PreservedAnalyses::none();