1 //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the IRTranslator class.
10 //===----------------------------------------------------------------------===//
12 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BranchProbabilityInfo.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/Analysis.h"
23 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
24 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
25 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
26 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
27 #include "llvm/CodeGen/LowLevelType.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineMemOperand.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/RuntimeLibcalls.h"
37 #include "llvm/CodeGen/StackProtector.h"
38 #include "llvm/CodeGen/SwitchLoweringUtils.h"
39 #include "llvm/CodeGen/TargetFrameLowering.h"
40 #include "llvm/CodeGen/TargetInstrInfo.h"
41 #include "llvm/CodeGen/TargetLowering.h"
42 #include "llvm/CodeGen/TargetPassConfig.h"
43 #include "llvm/CodeGen/TargetRegisterInfo.h"
44 #include "llvm/CodeGen/TargetSubtargetInfo.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/CFG.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DebugInfo.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/DiagnosticInfo.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/GetElementPtrTypeIterator.h"
55 #include "llvm/IR/InlineAsm.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Intrinsics.h"
60 #include "llvm/IR/LLVMContext.h"
61 #include "llvm/IR/Metadata.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/InitializePasses.h"
67 #include "llvm/MC/MCContext.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CodeGen.h"
71 #include "llvm/Support/Debug.h"
72 #include "llvm/Support/ErrorHandling.h"
73 #include "llvm/Support/LowLevelTypeImpl.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/Support/raw_ostream.h"
76 #include "llvm/Target/TargetIntrinsicInfo.h"
77 #include "llvm/Target/TargetMachine.h"
78 #include "llvm/Transforms/Utils/MemoryOpRemark.h"
88 #define DEBUG_TYPE "irtranslator"
93 EnableCSEInIRTranslator("enable-cse-in-irtranslator",
94 cl::desc("Should enable CSE in irtranslator"),
95 cl::Optional
, cl::init(false));
96 char IRTranslator::ID
= 0;
98 INITIALIZE_PASS_BEGIN(IRTranslator
, DEBUG_TYPE
, "IRTranslator LLVM IR -> MI",
100 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
101 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass
)
102 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass
)
103 INITIALIZE_PASS_DEPENDENCY(StackProtector
)
104 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
105 INITIALIZE_PASS_END(IRTranslator
, DEBUG_TYPE
, "IRTranslator LLVM IR -> MI",
108 static void reportTranslationError(MachineFunction
&MF
,
109 const TargetPassConfig
&TPC
,
110 OptimizationRemarkEmitter
&ORE
,
111 OptimizationRemarkMissed
&R
) {
112 MF
.getProperties().set(MachineFunctionProperties::Property::FailedISel
);
114 // Print the function name explicitly if we don't have a debug location (which
115 // makes the diagnostic less useful) or if we're going to emit a raw error.
116 if (!R
.getLocation().isValid() || TPC
.isGlobalISelAbortEnabled())
117 R
<< (" (in function: " + MF
.getName() + ")").str();
119 if (TPC
.isGlobalISelAbortEnabled())
120 report_fatal_error(Twine(R
.getMsg()));
125 IRTranslator::IRTranslator(CodeGenOpt::Level optlevel
)
126 : MachineFunctionPass(ID
), OptLevel(optlevel
) {}
130 /// Verify that every instruction created has the same DILocation as the
131 /// instruction being translated.
132 class DILocationVerifier
: public GISelChangeObserver
{
133 const Instruction
*CurrInst
= nullptr;
136 DILocationVerifier() = default;
137 ~DILocationVerifier() = default;
139 const Instruction
*getCurrentInst() const { return CurrInst
; }
140 void setCurrentInst(const Instruction
*Inst
) { CurrInst
= Inst
; }
142 void erasingInstr(MachineInstr
&MI
) override
{}
143 void changingInstr(MachineInstr
&MI
) override
{}
144 void changedInstr(MachineInstr
&MI
) override
{}
146 void createdInstr(MachineInstr
&MI
) override
{
147 assert(getCurrentInst() && "Inserted instruction without a current MI");
149 // Only print the check message if we're actually checking it.
151 LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
152 << " was copied to " << MI
);
154 // We allow insts in the entry block to have a debug loc line of 0 because
155 // they could have originated from constants, and we don't want a jumpy
157 assert((CurrInst
->getDebugLoc() == MI
.getDebugLoc() ||
158 MI
.getDebugLoc().getLine() == 0) &&
159 "Line info was not transferred to all instructions");
163 #endif // ifndef NDEBUG
166 void IRTranslator::getAnalysisUsage(AnalysisUsage
&AU
) const {
167 AU
.addRequired
<StackProtector
>();
168 AU
.addRequired
<TargetPassConfig
>();
169 AU
.addRequired
<GISelCSEAnalysisWrapperPass
>();
170 if (OptLevel
!= CodeGenOpt::None
)
171 AU
.addRequired
<BranchProbabilityInfoWrapperPass
>();
172 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
173 AU
.addPreserved
<TargetLibraryInfoWrapperPass
>();
174 getSelectionDAGFallbackAnalysisUsage(AU
);
175 MachineFunctionPass::getAnalysisUsage(AU
);
178 IRTranslator::ValueToVRegInfo::VRegListT
&
179 IRTranslator::allocateVRegs(const Value
&Val
) {
180 auto VRegsIt
= VMap
.findVRegs(Val
);
181 if (VRegsIt
!= VMap
.vregs_end())
182 return *VRegsIt
->second
;
183 auto *Regs
= VMap
.getVRegs(Val
);
184 auto *Offsets
= VMap
.getOffsets(Val
);
185 SmallVector
<LLT
, 4> SplitTys
;
186 computeValueLLTs(*DL
, *Val
.getType(), SplitTys
,
187 Offsets
->empty() ? Offsets
: nullptr);
188 for (unsigned i
= 0; i
< SplitTys
.size(); ++i
)
193 ArrayRef
<Register
> IRTranslator::getOrCreateVRegs(const Value
&Val
) {
194 auto VRegsIt
= VMap
.findVRegs(Val
);
195 if (VRegsIt
!= VMap
.vregs_end())
196 return *VRegsIt
->second
;
198 if (Val
.getType()->isVoidTy())
199 return *VMap
.getVRegs(Val
);
201 // Create entry for this type.
202 auto *VRegs
= VMap
.getVRegs(Val
);
203 auto *Offsets
= VMap
.getOffsets(Val
);
205 assert(Val
.getType()->isSized() &&
206 "Don't know how to create an empty vreg");
208 SmallVector
<LLT
, 4> SplitTys
;
209 computeValueLLTs(*DL
, *Val
.getType(), SplitTys
,
210 Offsets
->empty() ? Offsets
: nullptr);
212 if (!isa
<Constant
>(Val
)) {
213 for (auto Ty
: SplitTys
)
214 VRegs
->push_back(MRI
->createGenericVirtualRegister(Ty
));
218 if (Val
.getType()->isAggregateType()) {
219 // UndefValue, ConstantAggregateZero
220 auto &C
= cast
<Constant
>(Val
);
222 while (auto Elt
= C
.getAggregateElement(Idx
++)) {
223 auto EltRegs
= getOrCreateVRegs(*Elt
);
224 llvm::copy(EltRegs
, std::back_inserter(*VRegs
));
227 assert(SplitTys
.size() == 1 && "unexpectedly split LLT");
228 VRegs
->push_back(MRI
->createGenericVirtualRegister(SplitTys
[0]));
229 bool Success
= translate(cast
<Constant
>(Val
), VRegs
->front());
231 OptimizationRemarkMissed
R("gisel-irtranslator", "GISelFailure",
232 MF
->getFunction().getSubprogram(),
233 &MF
->getFunction().getEntryBlock());
234 R
<< "unable to translate constant: " << ore::NV("Type", Val
.getType());
235 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
243 int IRTranslator::getOrCreateFrameIndex(const AllocaInst
&AI
) {
244 auto MapEntry
= FrameIndices
.find(&AI
);
245 if (MapEntry
!= FrameIndices
.end())
246 return MapEntry
->second
;
248 uint64_t ElementSize
= DL
->getTypeAllocSize(AI
.getAllocatedType());
250 ElementSize
* cast
<ConstantInt
>(AI
.getArraySize())->getZExtValue();
252 // Always allocate at least one byte.
253 Size
= std::max
<uint64_t>(Size
, 1u);
255 int &FI
= FrameIndices
[&AI
];
256 FI
= MF
->getFrameInfo().CreateStackObject(Size
, AI
.getAlign(), false, &AI
);
260 Align
IRTranslator::getMemOpAlign(const Instruction
&I
) {
261 if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(&I
))
262 return SI
->getAlign();
263 if (const LoadInst
*LI
= dyn_cast
<LoadInst
>(&I
))
264 return LI
->getAlign();
265 if (const AtomicCmpXchgInst
*AI
= dyn_cast
<AtomicCmpXchgInst
>(&I
))
266 return AI
->getAlign();
267 if (const AtomicRMWInst
*AI
= dyn_cast
<AtomicRMWInst
>(&I
))
268 return AI
->getAlign();
270 OptimizationRemarkMissed
R("gisel-irtranslator", "", &I
);
271 R
<< "unable to translate memop: " << ore::NV("Opcode", &I
);
272 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
276 MachineBasicBlock
&IRTranslator::getMBB(const BasicBlock
&BB
) {
277 MachineBasicBlock
*&MBB
= BBToMBB
[&BB
];
278 assert(MBB
&& "BasicBlock was not encountered before");
282 void IRTranslator::addMachineCFGPred(CFGEdge Edge
, MachineBasicBlock
*NewPred
) {
283 assert(NewPred
&& "new predecessor must be a real MachineBasicBlock");
284 MachinePreds
[Edge
].push_back(NewPred
);
287 bool IRTranslator::translateBinaryOp(unsigned Opcode
, const User
&U
,
288 MachineIRBuilder
&MIRBuilder
) {
289 // Get or create a virtual register for each value.
290 // Unless the value is a Constant => loadimm cst?
291 // or inline constant each time?
292 // Creation of a virtual register needs to have a size.
293 Register Op0
= getOrCreateVReg(*U
.getOperand(0));
294 Register Op1
= getOrCreateVReg(*U
.getOperand(1));
295 Register Res
= getOrCreateVReg(U
);
297 if (isa
<Instruction
>(U
)) {
298 const Instruction
&I
= cast
<Instruction
>(U
);
299 Flags
= MachineInstr::copyFlagsFromInstruction(I
);
302 MIRBuilder
.buildInstr(Opcode
, {Res
}, {Op0
, Op1
}, Flags
);
306 bool IRTranslator::translateUnaryOp(unsigned Opcode
, const User
&U
,
307 MachineIRBuilder
&MIRBuilder
) {
308 Register Op0
= getOrCreateVReg(*U
.getOperand(0));
309 Register Res
= getOrCreateVReg(U
);
311 if (isa
<Instruction
>(U
)) {
312 const Instruction
&I
= cast
<Instruction
>(U
);
313 Flags
= MachineInstr::copyFlagsFromInstruction(I
);
315 MIRBuilder
.buildInstr(Opcode
, {Res
}, {Op0
}, Flags
);
319 bool IRTranslator::translateFNeg(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
320 return translateUnaryOp(TargetOpcode::G_FNEG
, U
, MIRBuilder
);
323 bool IRTranslator::translateCompare(const User
&U
,
324 MachineIRBuilder
&MIRBuilder
) {
325 auto *CI
= dyn_cast
<CmpInst
>(&U
);
326 Register Op0
= getOrCreateVReg(*U
.getOperand(0));
327 Register Op1
= getOrCreateVReg(*U
.getOperand(1));
328 Register Res
= getOrCreateVReg(U
);
329 CmpInst::Predicate Pred
=
330 CI
? CI
->getPredicate() : static_cast<CmpInst::Predicate
>(
331 cast
<ConstantExpr
>(U
).getPredicate());
332 if (CmpInst::isIntPredicate(Pred
))
333 MIRBuilder
.buildICmp(Pred
, Res
, Op0
, Op1
);
334 else if (Pred
== CmpInst::FCMP_FALSE
)
335 MIRBuilder
.buildCopy(
336 Res
, getOrCreateVReg(*Constant::getNullValue(U
.getType())));
337 else if (Pred
== CmpInst::FCMP_TRUE
)
338 MIRBuilder
.buildCopy(
339 Res
, getOrCreateVReg(*Constant::getAllOnesValue(U
.getType())));
343 Flags
= MachineInstr::copyFlagsFromInstruction(*CI
);
344 MIRBuilder
.buildFCmp(Pred
, Res
, Op0
, Op1
, Flags
);
350 bool IRTranslator::translateRet(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
351 const ReturnInst
&RI
= cast
<ReturnInst
>(U
);
352 const Value
*Ret
= RI
.getReturnValue();
353 if (Ret
&& DL
->getTypeStoreSize(Ret
->getType()) == 0)
356 ArrayRef
<Register
> VRegs
;
358 VRegs
= getOrCreateVRegs(*Ret
);
360 Register SwiftErrorVReg
= 0;
361 if (CLI
->supportSwiftError() && SwiftError
.getFunctionArg()) {
362 SwiftErrorVReg
= SwiftError
.getOrCreateVRegUseAt(
363 &RI
, &MIRBuilder
.getMBB(), SwiftError
.getFunctionArg());
366 // The target may mess up with the insertion point, but
367 // this is not important as a return is the last instruction
368 // of the block anyway.
369 return CLI
->lowerReturn(MIRBuilder
, Ret
, VRegs
, FuncInfo
, SwiftErrorVReg
);
372 void IRTranslator::emitBranchForMergedCondition(
373 const Value
*Cond
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
374 MachineBasicBlock
*CurBB
, MachineBasicBlock
*SwitchBB
,
375 BranchProbability TProb
, BranchProbability FProb
, bool InvertCond
) {
376 // If the leaf of the tree is a comparison, merge the condition into
378 if (const CmpInst
*BOp
= dyn_cast
<CmpInst
>(Cond
)) {
379 CmpInst::Predicate Condition
;
380 if (const ICmpInst
*IC
= dyn_cast
<ICmpInst
>(Cond
)) {
381 Condition
= InvertCond
? IC
->getInversePredicate() : IC
->getPredicate();
383 const FCmpInst
*FC
= cast
<FCmpInst
>(Cond
);
384 Condition
= InvertCond
? FC
->getInversePredicate() : FC
->getPredicate();
387 SwitchCG::CaseBlock
CB(Condition
, false, BOp
->getOperand(0),
388 BOp
->getOperand(1), nullptr, TBB
, FBB
, CurBB
,
389 CurBuilder
->getDebugLoc(), TProb
, FProb
);
390 SL
->SwitchCases
.push_back(CB
);
394 // Create a CaseBlock record representing this branch.
395 CmpInst::Predicate Pred
= InvertCond
? CmpInst::ICMP_NE
: CmpInst::ICMP_EQ
;
396 SwitchCG::CaseBlock
CB(
397 Pred
, false, Cond
, ConstantInt::getTrue(MF
->getFunction().getContext()),
398 nullptr, TBB
, FBB
, CurBB
, CurBuilder
->getDebugLoc(), TProb
, FProb
);
399 SL
->SwitchCases
.push_back(CB
);
402 static bool isValInBlock(const Value
*V
, const BasicBlock
*BB
) {
403 if (const Instruction
*I
= dyn_cast
<Instruction
>(V
))
404 return I
->getParent() == BB
;
408 void IRTranslator::findMergedConditions(
409 const Value
*Cond
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
410 MachineBasicBlock
*CurBB
, MachineBasicBlock
*SwitchBB
,
411 Instruction::BinaryOps Opc
, BranchProbability TProb
,
412 BranchProbability FProb
, bool InvertCond
) {
413 using namespace PatternMatch
;
414 assert((Opc
== Instruction::And
|| Opc
== Instruction::Or
) &&
415 "Expected Opc to be AND/OR");
416 // Skip over not part of the tree and remember to invert op and operands at
419 if (match(Cond
, m_OneUse(m_Not(m_Value(NotCond
)))) &&
420 isValInBlock(NotCond
, CurBB
->getBasicBlock())) {
421 findMergedConditions(NotCond
, TBB
, FBB
, CurBB
, SwitchBB
, Opc
, TProb
, FProb
,
426 const Instruction
*BOp
= dyn_cast
<Instruction
>(Cond
);
427 const Value
*BOpOp0
, *BOpOp1
;
428 // Compute the effective opcode for Cond, taking into account whether it needs
429 // to be inverted, e.g.
430 // and (not (or A, B)), C
432 // and (and (not A, not B), C)
433 Instruction::BinaryOps BOpc
= (Instruction::BinaryOps
)0;
435 BOpc
= match(BOp
, m_LogicalAnd(m_Value(BOpOp0
), m_Value(BOpOp1
)))
437 : (match(BOp
, m_LogicalOr(m_Value(BOpOp0
), m_Value(BOpOp1
)))
439 : (Instruction::BinaryOps
)0);
441 if (BOpc
== Instruction::And
)
442 BOpc
= Instruction::Or
;
443 else if (BOpc
== Instruction::Or
)
444 BOpc
= Instruction::And
;
448 // If this node is not part of the or/and tree, emit it as a branch.
449 // Note that all nodes in the tree should have same opcode.
450 bool BOpIsInOrAndTree
= BOpc
&& BOpc
== Opc
&& BOp
->hasOneUse();
451 if (!BOpIsInOrAndTree
|| BOp
->getParent() != CurBB
->getBasicBlock() ||
452 !isValInBlock(BOpOp0
, CurBB
->getBasicBlock()) ||
453 !isValInBlock(BOpOp1
, CurBB
->getBasicBlock())) {
454 emitBranchForMergedCondition(Cond
, TBB
, FBB
, CurBB
, SwitchBB
, TProb
, FProb
,
459 // Create TmpBB after CurBB.
460 MachineFunction::iterator
BBI(CurBB
);
461 MachineBasicBlock
*TmpBB
=
462 MF
->CreateMachineBasicBlock(CurBB
->getBasicBlock());
463 CurBB
->getParent()->insert(++BBI
, TmpBB
);
465 if (Opc
== Instruction::Or
) {
475 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
476 // The requirement is that
477 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
478 // = TrueProb for original BB.
479 // Assuming the original probabilities are A and B, one choice is to set
480 // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
481 // A/(1+B) and 2B/(1+B). This choice assumes that
482 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
483 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
484 // TmpBB, but the math is more complicated.
486 auto NewTrueProb
= TProb
/ 2;
487 auto NewFalseProb
= TProb
/ 2 + FProb
;
488 // Emit the LHS condition.
489 findMergedConditions(BOpOp0
, TBB
, TmpBB
, CurBB
, SwitchBB
, Opc
, NewTrueProb
,
490 NewFalseProb
, InvertCond
);
492 // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
493 SmallVector
<BranchProbability
, 2> Probs
{TProb
/ 2, FProb
};
494 BranchProbability::normalizeProbabilities(Probs
.begin(), Probs
.end());
495 // Emit the RHS condition into TmpBB.
496 findMergedConditions(BOpOp1
, TBB
, FBB
, TmpBB
, SwitchBB
, Opc
, Probs
[0],
497 Probs
[1], InvertCond
);
499 assert(Opc
== Instruction::And
&& "Unknown merge op!");
508 // This requires creation of TmpBB after CurBB.
510 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
511 // The requirement is that
512 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
513 // = FalseProb for original BB.
514 // Assuming the original probabilities are A and B, one choice is to set
515 // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
516 // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
517 // TrueProb for BB1 * FalseProb for TmpBB.
519 auto NewTrueProb
= TProb
+ FProb
/ 2;
520 auto NewFalseProb
= FProb
/ 2;
521 // Emit the LHS condition.
522 findMergedConditions(BOpOp0
, TmpBB
, FBB
, CurBB
, SwitchBB
, Opc
, NewTrueProb
,
523 NewFalseProb
, InvertCond
);
525 // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
526 SmallVector
<BranchProbability
, 2> Probs
{TProb
, FProb
/ 2};
527 BranchProbability::normalizeProbabilities(Probs
.begin(), Probs
.end());
528 // Emit the RHS condition into TmpBB.
529 findMergedConditions(BOpOp1
, TBB
, FBB
, TmpBB
, SwitchBB
, Opc
, Probs
[0],
530 Probs
[1], InvertCond
);
534 bool IRTranslator::shouldEmitAsBranches(
535 const std::vector
<SwitchCG::CaseBlock
> &Cases
) {
536 // For multiple cases, it's better to emit as branches.
537 if (Cases
.size() != 2)
540 // If this is two comparisons of the same values or'd or and'd together, they
541 // will get folded into a single comparison, so don't emit two blocks.
542 if ((Cases
[0].CmpLHS
== Cases
[1].CmpLHS
&&
543 Cases
[0].CmpRHS
== Cases
[1].CmpRHS
) ||
544 (Cases
[0].CmpRHS
== Cases
[1].CmpLHS
&&
545 Cases
[0].CmpLHS
== Cases
[1].CmpRHS
)) {
549 // Handle: (X != null) | (Y != null) --> (X|Y) != 0
550 // Handle: (X == null) & (Y == null) --> (X|Y) == 0
551 if (Cases
[0].CmpRHS
== Cases
[1].CmpRHS
&&
552 Cases
[0].PredInfo
.Pred
== Cases
[1].PredInfo
.Pred
&&
553 isa
<Constant
>(Cases
[0].CmpRHS
) &&
554 cast
<Constant
>(Cases
[0].CmpRHS
)->isNullValue()) {
555 if (Cases
[0].PredInfo
.Pred
== CmpInst::ICMP_EQ
&&
556 Cases
[0].TrueBB
== Cases
[1].ThisBB
)
558 if (Cases
[0].PredInfo
.Pred
== CmpInst::ICMP_NE
&&
559 Cases
[0].FalseBB
== Cases
[1].ThisBB
)
566 bool IRTranslator::translateBr(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
567 const BranchInst
&BrInst
= cast
<BranchInst
>(U
);
568 auto &CurMBB
= MIRBuilder
.getMBB();
569 auto *Succ0MBB
= &getMBB(*BrInst
.getSuccessor(0));
571 if (BrInst
.isUnconditional()) {
572 // If the unconditional target is the layout successor, fallthrough.
573 if (OptLevel
== CodeGenOpt::None
|| !CurMBB
.isLayoutSuccessor(Succ0MBB
))
574 MIRBuilder
.buildBr(*Succ0MBB
);
577 for (const BasicBlock
*Succ
: successors(&BrInst
))
578 CurMBB
.addSuccessor(&getMBB(*Succ
));
582 // If this condition is one of the special cases we handle, do special stuff
584 const Value
*CondVal
= BrInst
.getCondition();
585 MachineBasicBlock
*Succ1MBB
= &getMBB(*BrInst
.getSuccessor(1));
587 const auto &TLI
= *MF
->getSubtarget().getTargetLowering();
589 // If this is a series of conditions that are or'd or and'd together, emit
590 // this as a sequence of branches instead of setcc's with and/or operations.
591 // As long as jumps are not expensive (exceptions for multi-use logic ops,
592 // unpredictable branches, and vector extracts because those jumps are likely
593 // expensive for any target), this should improve performance.
594 // For example, instead of something like:
606 using namespace PatternMatch
;
607 const Instruction
*CondI
= dyn_cast
<Instruction
>(CondVal
);
608 if (!TLI
.isJumpExpensive() && CondI
&& CondI
->hasOneUse() &&
609 !BrInst
.hasMetadata(LLVMContext::MD_unpredictable
)) {
610 Instruction::BinaryOps Opcode
= (Instruction::BinaryOps
)0;
612 const Value
*BOp0
, *BOp1
;
613 if (match(CondI
, m_LogicalAnd(m_Value(BOp0
), m_Value(BOp1
))))
614 Opcode
= Instruction::And
;
615 else if (match(CondI
, m_LogicalOr(m_Value(BOp0
), m_Value(BOp1
))))
616 Opcode
= Instruction::Or
;
618 if (Opcode
&& !(match(BOp0
, m_ExtractElt(m_Value(Vec
), m_Value())) &&
619 match(BOp1
, m_ExtractElt(m_Specific(Vec
), m_Value())))) {
620 findMergedConditions(CondI
, Succ0MBB
, Succ1MBB
, &CurMBB
, &CurMBB
, Opcode
,
621 getEdgeProbability(&CurMBB
, Succ0MBB
),
622 getEdgeProbability(&CurMBB
, Succ1MBB
),
623 /*InvertCond=*/false);
624 assert(SL
->SwitchCases
[0].ThisBB
== &CurMBB
&& "Unexpected lowering!");
626 // Allow some cases to be rejected.
627 if (shouldEmitAsBranches(SL
->SwitchCases
)) {
628 // Emit the branch for this block.
629 emitSwitchCase(SL
->SwitchCases
[0], &CurMBB
, *CurBuilder
);
630 SL
->SwitchCases
.erase(SL
->SwitchCases
.begin());
634 // Okay, we decided not to do this, remove any inserted MBB's and clear
636 for (unsigned I
= 1, E
= SL
->SwitchCases
.size(); I
!= E
; ++I
)
637 MF
->erase(SL
->SwitchCases
[I
].ThisBB
);
639 SL
->SwitchCases
.clear();
643 // Create a CaseBlock record representing this branch.
644 SwitchCG::CaseBlock
CB(CmpInst::ICMP_EQ
, false, CondVal
,
645 ConstantInt::getTrue(MF
->getFunction().getContext()),
646 nullptr, Succ0MBB
, Succ1MBB
, &CurMBB
,
647 CurBuilder
->getDebugLoc());
649 // Use emitSwitchCase to actually insert the fast branch sequence for this
651 emitSwitchCase(CB
, &CurMBB
, *CurBuilder
);
655 void IRTranslator::addSuccessorWithProb(MachineBasicBlock
*Src
,
656 MachineBasicBlock
*Dst
,
657 BranchProbability Prob
) {
659 Src
->addSuccessorWithoutProb(Dst
);
662 if (Prob
.isUnknown())
663 Prob
= getEdgeProbability(Src
, Dst
);
664 Src
->addSuccessor(Dst
, Prob
);
668 IRTranslator::getEdgeProbability(const MachineBasicBlock
*Src
,
669 const MachineBasicBlock
*Dst
) const {
670 const BasicBlock
*SrcBB
= Src
->getBasicBlock();
671 const BasicBlock
*DstBB
= Dst
->getBasicBlock();
673 // If BPI is not available, set the default probability as 1 / N, where N is
674 // the number of successors.
675 auto SuccSize
= std::max
<uint32_t>(succ_size(SrcBB
), 1);
676 return BranchProbability(1, SuccSize
);
678 return FuncInfo
.BPI
->getEdgeProbability(SrcBB
, DstBB
);
681 bool IRTranslator::translateSwitch(const User
&U
, MachineIRBuilder
&MIB
) {
682 using namespace SwitchCG
;
683 // Extract cases from the switch.
684 const SwitchInst
&SI
= cast
<SwitchInst
>(U
);
685 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
686 CaseClusterVector Clusters
;
687 Clusters
.reserve(SI
.getNumCases());
688 for (auto &I
: SI
.cases()) {
689 MachineBasicBlock
*Succ
= &getMBB(*I
.getCaseSuccessor());
690 assert(Succ
&& "Could not find successor mbb in mapping");
691 const ConstantInt
*CaseVal
= I
.getCaseValue();
692 BranchProbability Prob
=
693 BPI
? BPI
->getEdgeProbability(SI
.getParent(), I
.getSuccessorIndex())
694 : BranchProbability(1, SI
.getNumCases() + 1);
695 Clusters
.push_back(CaseCluster::range(CaseVal
, CaseVal
, Succ
, Prob
));
698 MachineBasicBlock
*DefaultMBB
= &getMBB(*SI
.getDefaultDest());
700 // Cluster adjacent cases with the same destination. We do this at all
701 // optimization levels because it's cheap to do and will make codegen faster
702 // if there are many clusters.
703 sortAndRangeify(Clusters
);
705 MachineBasicBlock
*SwitchMBB
= &getMBB(*SI
.getParent());
707 // If there is only the default destination, jump there directly.
708 if (Clusters
.empty()) {
709 SwitchMBB
->addSuccessor(DefaultMBB
);
710 if (DefaultMBB
!= SwitchMBB
->getNextNode())
711 MIB
.buildBr(*DefaultMBB
);
715 SL
->findJumpTables(Clusters
, &SI
, DefaultMBB
, nullptr, nullptr);
716 SL
->findBitTestClusters(Clusters
, &SI
);
719 dbgs() << "Case clusters: ";
720 for (const CaseCluster
&C
: Clusters
) {
721 if (C
.Kind
== CC_JumpTable
)
723 if (C
.Kind
== CC_BitTests
)
726 C
.Low
->getValue().print(dbgs(), true);
727 if (C
.Low
!= C
.High
) {
729 C
.High
->getValue().print(dbgs(), true);
736 assert(!Clusters
.empty());
737 SwitchWorkList WorkList
;
738 CaseClusterIt First
= Clusters
.begin();
739 CaseClusterIt Last
= Clusters
.end() - 1;
740 auto DefaultProb
= getEdgeProbability(SwitchMBB
, DefaultMBB
);
741 WorkList
.push_back({SwitchMBB
, First
, Last
, nullptr, nullptr, DefaultProb
});
743 // FIXME: At the moment we don't do any splitting optimizations here like
744 // SelectionDAG does, so this worklist only has one entry.
745 while (!WorkList
.empty()) {
746 SwitchWorkListItem W
= WorkList
.pop_back_val();
747 if (!lowerSwitchWorkItem(W
, SI
.getCondition(), SwitchMBB
, DefaultMBB
, MIB
))
753 void IRTranslator::emitJumpTable(SwitchCG::JumpTable
&JT
,
754 MachineBasicBlock
*MBB
) {
755 // Emit the code for the jump table
756 assert(JT
.Reg
!= -1U && "Should lower JT Header first!");
757 MachineIRBuilder
MIB(*MBB
->getParent());
759 MIB
.setDebugLoc(CurBuilder
->getDebugLoc());
761 Type
*PtrIRTy
= Type::getInt8PtrTy(MF
->getFunction().getContext());
762 const LLT PtrTy
= getLLTForType(*PtrIRTy
, *DL
);
764 auto Table
= MIB
.buildJumpTable(PtrTy
, JT
.JTI
);
765 MIB
.buildBrJT(Table
.getReg(0), JT
.JTI
, JT
.Reg
);
768 bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable
&JT
,
769 SwitchCG::JumpTableHeader
&JTH
,
770 MachineBasicBlock
*HeaderBB
) {
771 MachineIRBuilder
MIB(*HeaderBB
->getParent());
772 MIB
.setMBB(*HeaderBB
);
773 MIB
.setDebugLoc(CurBuilder
->getDebugLoc());
775 const Value
&SValue
= *JTH
.SValue
;
776 // Subtract the lowest switch case value from the value being switched on.
777 const LLT SwitchTy
= getLLTForType(*SValue
.getType(), *DL
);
778 Register SwitchOpReg
= getOrCreateVReg(SValue
);
779 auto FirstCst
= MIB
.buildConstant(SwitchTy
, JTH
.First
);
780 auto Sub
= MIB
.buildSub({SwitchTy
}, SwitchOpReg
, FirstCst
);
782 // This value may be smaller or larger than the target's pointer type, and
783 // therefore require extension or truncating.
784 Type
*PtrIRTy
= SValue
.getType()->getPointerTo();
785 const LLT PtrScalarTy
= LLT::scalar(DL
->getTypeSizeInBits(PtrIRTy
));
786 Sub
= MIB
.buildZExtOrTrunc(PtrScalarTy
, Sub
);
788 JT
.Reg
= Sub
.getReg(0);
790 if (JTH
.FallthroughUnreachable
) {
791 if (JT
.MBB
!= HeaderBB
->getNextNode())
792 MIB
.buildBr(*JT
.MBB
);
796 // Emit the range check for the jump table, and branch to the default block
797 // for the switch statement if the value being switched on exceeds the
798 // largest case in the switch.
799 auto Cst
= getOrCreateVReg(
800 *ConstantInt::get(SValue
.getType(), JTH
.Last
- JTH
.First
));
801 Cst
= MIB
.buildZExtOrTrunc(PtrScalarTy
, Cst
).getReg(0);
802 auto Cmp
= MIB
.buildICmp(CmpInst::ICMP_UGT
, LLT::scalar(1), Sub
, Cst
);
804 auto BrCond
= MIB
.buildBrCond(Cmp
.getReg(0), *JT
.Default
);
806 // Avoid emitting unnecessary branches to the next block.
807 if (JT
.MBB
!= HeaderBB
->getNextNode())
808 BrCond
= MIB
.buildBr(*JT
.MBB
);
812 void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock
&CB
,
813 MachineBasicBlock
*SwitchBB
,
814 MachineIRBuilder
&MIB
) {
815 Register CondLHS
= getOrCreateVReg(*CB
.CmpLHS
);
817 DebugLoc OldDbgLoc
= MIB
.getDebugLoc();
818 MIB
.setDebugLoc(CB
.DbgLoc
);
819 MIB
.setMBB(*CB
.ThisBB
);
821 if (CB
.PredInfo
.NoCmp
) {
822 // Branch or fall through to TrueBB.
823 addSuccessorWithProb(CB
.ThisBB
, CB
.TrueBB
, CB
.TrueProb
);
824 addMachineCFGPred({SwitchBB
->getBasicBlock(), CB
.TrueBB
->getBasicBlock()},
826 CB
.ThisBB
->normalizeSuccProbs();
827 if (CB
.TrueBB
!= CB
.ThisBB
->getNextNode())
828 MIB
.buildBr(*CB
.TrueBB
);
829 MIB
.setDebugLoc(OldDbgLoc
);
833 const LLT i1Ty
= LLT::scalar(1);
834 // Build the compare.
836 const auto *CI
= dyn_cast
<ConstantInt
>(CB
.CmpRHS
);
837 // For conditional branch lowering, we might try to do something silly like
838 // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
839 // just re-use the existing condition vreg.
840 if (MRI
->getType(CondLHS
).getSizeInBits() == 1 && CI
&&
841 CI
->getZExtValue() == 1 && CB
.PredInfo
.Pred
== CmpInst::ICMP_EQ
) {
844 Register CondRHS
= getOrCreateVReg(*CB
.CmpRHS
);
845 if (CmpInst::isFPPredicate(CB
.PredInfo
.Pred
))
847 MIB
.buildFCmp(CB
.PredInfo
.Pred
, i1Ty
, CondLHS
, CondRHS
).getReg(0);
850 MIB
.buildICmp(CB
.PredInfo
.Pred
, i1Ty
, CondLHS
, CondRHS
).getReg(0);
853 assert(CB
.PredInfo
.Pred
== CmpInst::ICMP_SLE
&&
854 "Can only handle SLE ranges");
856 const APInt
& Low
= cast
<ConstantInt
>(CB
.CmpLHS
)->getValue();
857 const APInt
& High
= cast
<ConstantInt
>(CB
.CmpRHS
)->getValue();
859 Register CmpOpReg
= getOrCreateVReg(*CB
.CmpMHS
);
860 if (cast
<ConstantInt
>(CB
.CmpLHS
)->isMinValue(true)) {
861 Register CondRHS
= getOrCreateVReg(*CB
.CmpRHS
);
863 MIB
.buildICmp(CmpInst::ICMP_SLE
, i1Ty
, CmpOpReg
, CondRHS
).getReg(0);
865 const LLT CmpTy
= MRI
->getType(CmpOpReg
);
866 auto Sub
= MIB
.buildSub({CmpTy
}, CmpOpReg
, CondLHS
);
867 auto Diff
= MIB
.buildConstant(CmpTy
, High
- Low
);
868 Cond
= MIB
.buildICmp(CmpInst::ICMP_ULE
, i1Ty
, Sub
, Diff
).getReg(0);
872 // Update successor info
873 addSuccessorWithProb(CB
.ThisBB
, CB
.TrueBB
, CB
.TrueProb
);
875 addMachineCFGPred({SwitchBB
->getBasicBlock(), CB
.TrueBB
->getBasicBlock()},
878 // TrueBB and FalseBB are always different unless the incoming IR is
879 // degenerate. This only happens when running llc on weird IR.
880 if (CB
.TrueBB
!= CB
.FalseBB
)
881 addSuccessorWithProb(CB
.ThisBB
, CB
.FalseBB
, CB
.FalseProb
);
882 CB
.ThisBB
->normalizeSuccProbs();
884 addMachineCFGPred({SwitchBB
->getBasicBlock(), CB
.FalseBB
->getBasicBlock()},
887 MIB
.buildBrCond(Cond
, *CB
.TrueBB
);
888 MIB
.buildBr(*CB
.FalseBB
);
889 MIB
.setDebugLoc(OldDbgLoc
);
892 bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W
,
893 MachineBasicBlock
*SwitchMBB
,
894 MachineBasicBlock
*CurMBB
,
895 MachineBasicBlock
*DefaultMBB
,
896 MachineIRBuilder
&MIB
,
897 MachineFunction::iterator BBI
,
898 BranchProbability UnhandledProbs
,
899 SwitchCG::CaseClusterIt I
,
900 MachineBasicBlock
*Fallthrough
,
901 bool FallthroughUnreachable
) {
902 using namespace SwitchCG
;
903 MachineFunction
*CurMF
= SwitchMBB
->getParent();
904 // FIXME: Optimize away range check based on pivot comparisons.
905 JumpTableHeader
*JTH
= &SL
->JTCases
[I
->JTCasesIndex
].first
;
906 SwitchCG::JumpTable
*JT
= &SL
->JTCases
[I
->JTCasesIndex
].second
;
907 BranchProbability DefaultProb
= W
.DefaultProb
;
909 // The jump block hasn't been inserted yet; insert it here.
910 MachineBasicBlock
*JumpMBB
= JT
->MBB
;
911 CurMF
->insert(BBI
, JumpMBB
);
913 // Since the jump table block is separate from the switch block, we need
914 // to keep track of it as a machine predecessor to the default block,
915 // otherwise we lose the phi edges.
916 addMachineCFGPred({SwitchMBB
->getBasicBlock(), DefaultMBB
->getBasicBlock()},
918 addMachineCFGPred({SwitchMBB
->getBasicBlock(), DefaultMBB
->getBasicBlock()},
921 auto JumpProb
= I
->Prob
;
922 auto FallthroughProb
= UnhandledProbs
;
924 // If the default statement is a target of the jump table, we evenly
925 // distribute the default probability to successors of CurMBB. Also
926 // update the probability on the edge from JumpMBB to Fallthrough.
927 for (MachineBasicBlock::succ_iterator SI
= JumpMBB
->succ_begin(),
928 SE
= JumpMBB
->succ_end();
930 if (*SI
== DefaultMBB
) {
931 JumpProb
+= DefaultProb
/ 2;
932 FallthroughProb
-= DefaultProb
/ 2;
933 JumpMBB
->setSuccProbability(SI
, DefaultProb
/ 2);
934 JumpMBB
->normalizeSuccProbs();
936 // Also record edges from the jump table block to it's successors.
937 addMachineCFGPred({SwitchMBB
->getBasicBlock(), (*SI
)->getBasicBlock()},
942 if (FallthroughUnreachable
)
943 JTH
->FallthroughUnreachable
= true;
945 if (!JTH
->FallthroughUnreachable
)
946 addSuccessorWithProb(CurMBB
, Fallthrough
, FallthroughProb
);
947 addSuccessorWithProb(CurMBB
, JumpMBB
, JumpProb
);
948 CurMBB
->normalizeSuccProbs();
950 // The jump table header will be inserted in our current block, do the
951 // range check, and fall through to our fallthrough block.
952 JTH
->HeaderBB
= CurMBB
;
953 JT
->Default
= Fallthrough
; // FIXME: Move Default to JumpTableHeader.
955 // If we're in the right place, emit the jump table header right now.
956 if (CurMBB
== SwitchMBB
) {
957 if (!emitJumpTableHeader(*JT
, *JTH
, CurMBB
))
963 bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I
,
965 MachineBasicBlock
*Fallthrough
,
966 bool FallthroughUnreachable
,
967 BranchProbability UnhandledProbs
,
968 MachineBasicBlock
*CurMBB
,
969 MachineIRBuilder
&MIB
,
970 MachineBasicBlock
*SwitchMBB
) {
971 using namespace SwitchCG
;
972 const Value
*RHS
, *LHS
, *MHS
;
973 CmpInst::Predicate Pred
;
974 if (I
->Low
== I
->High
) {
975 // Check Cond == I->Low.
976 Pred
= CmpInst::ICMP_EQ
;
981 // Check I->Low <= Cond <= I->High.
982 Pred
= CmpInst::ICMP_SLE
;
988 // If Fallthrough is unreachable, fold away the comparison.
989 // The false probability is the sum of all unhandled cases.
990 CaseBlock
CB(Pred
, FallthroughUnreachable
, LHS
, RHS
, MHS
, I
->MBB
, Fallthrough
,
991 CurMBB
, MIB
.getDebugLoc(), I
->Prob
, UnhandledProbs
);
993 emitSwitchCase(CB
, SwitchMBB
, MIB
);
997 void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock
&B
,
998 MachineBasicBlock
*SwitchBB
) {
999 MachineIRBuilder
&MIB
= *CurBuilder
;
1000 MIB
.setMBB(*SwitchBB
);
1002 // Subtract the minimum value.
1003 Register SwitchOpReg
= getOrCreateVReg(*B
.SValue
);
1005 LLT SwitchOpTy
= MRI
->getType(SwitchOpReg
);
1006 Register MinValReg
= MIB
.buildConstant(SwitchOpTy
, B
.First
).getReg(0);
1007 auto RangeSub
= MIB
.buildSub(SwitchOpTy
, SwitchOpReg
, MinValReg
);
1009 Type
*PtrIRTy
= Type::getInt8PtrTy(MF
->getFunction().getContext());
1010 const LLT PtrTy
= getLLTForType(*PtrIRTy
, *DL
);
1012 LLT MaskTy
= SwitchOpTy
;
1013 if (MaskTy
.getSizeInBits() > PtrTy
.getSizeInBits() ||
1014 !isPowerOf2_32(MaskTy
.getSizeInBits()))
1015 MaskTy
= LLT::scalar(PtrTy
.getSizeInBits());
1017 // Ensure that the type will fit the mask value.
1018 for (unsigned I
= 0, E
= B
.Cases
.size(); I
!= E
; ++I
) {
1019 if (!isUIntN(SwitchOpTy
.getSizeInBits(), B
.Cases
[I
].Mask
)) {
1020 // Switch table case range are encoded into series of masks.
1021 // Just use pointer type, it's guaranteed to fit.
1022 MaskTy
= LLT::scalar(PtrTy
.getSizeInBits());
1027 Register SubReg
= RangeSub
.getReg(0);
1028 if (SwitchOpTy
!= MaskTy
)
1029 SubReg
= MIB
.buildZExtOrTrunc(MaskTy
, SubReg
).getReg(0);
1031 B
.RegVT
= getMVTForLLT(MaskTy
);
1034 MachineBasicBlock
*MBB
= B
.Cases
[0].ThisBB
;
1036 if (!B
.FallthroughUnreachable
)
1037 addSuccessorWithProb(SwitchBB
, B
.Default
, B
.DefaultProb
);
1038 addSuccessorWithProb(SwitchBB
, MBB
, B
.Prob
);
1040 SwitchBB
->normalizeSuccProbs();
1042 if (!B
.FallthroughUnreachable
) {
1043 // Conditional branch to the default block.
1044 auto RangeCst
= MIB
.buildConstant(SwitchOpTy
, B
.Range
);
1045 auto RangeCmp
= MIB
.buildICmp(CmpInst::Predicate::ICMP_UGT
, LLT::scalar(1),
1046 RangeSub
, RangeCst
);
1047 MIB
.buildBrCond(RangeCmp
, *B
.Default
);
1050 // Avoid emitting unnecessary branches to the next block.
1051 if (MBB
!= SwitchBB
->getNextNode())
1055 void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock
&BB
,
1056 MachineBasicBlock
*NextMBB
,
1057 BranchProbability BranchProbToNext
,
1058 Register Reg
, SwitchCG::BitTestCase
&B
,
1059 MachineBasicBlock
*SwitchBB
) {
1060 MachineIRBuilder
&MIB
= *CurBuilder
;
1061 MIB
.setMBB(*SwitchBB
);
1063 LLT SwitchTy
= getLLTForMVT(BB
.RegVT
);
1065 unsigned PopCount
= countPopulation(B
.Mask
);
1066 if (PopCount
== 1) {
1067 // Testing for a single bit; just compare the shift count with what it
1068 // would need to be to shift a 1 bit in that position.
1069 auto MaskTrailingZeros
=
1070 MIB
.buildConstant(SwitchTy
, countTrailingZeros(B
.Mask
));
1072 MIB
.buildICmp(ICmpInst::ICMP_EQ
, LLT::scalar(1), Reg
, MaskTrailingZeros
)
1074 } else if (PopCount
== BB
.Range
) {
1075 // There is only one zero bit in the range, test for it directly.
1076 auto MaskTrailingOnes
=
1077 MIB
.buildConstant(SwitchTy
, countTrailingOnes(B
.Mask
));
1078 Cmp
= MIB
.buildICmp(CmpInst::ICMP_NE
, LLT::scalar(1), Reg
, MaskTrailingOnes
)
1081 // Make desired shift.
1082 auto CstOne
= MIB
.buildConstant(SwitchTy
, 1);
1083 auto SwitchVal
= MIB
.buildShl(SwitchTy
, CstOne
, Reg
);
1085 // Emit bit tests and jumps.
1086 auto CstMask
= MIB
.buildConstant(SwitchTy
, B
.Mask
);
1087 auto AndOp
= MIB
.buildAnd(SwitchTy
, SwitchVal
, CstMask
);
1088 auto CstZero
= MIB
.buildConstant(SwitchTy
, 0);
1089 Cmp
= MIB
.buildICmp(CmpInst::ICMP_NE
, LLT::scalar(1), AndOp
, CstZero
)
1093 // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
1094 addSuccessorWithProb(SwitchBB
, B
.TargetBB
, B
.ExtraProb
);
1095 // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
1096 addSuccessorWithProb(SwitchBB
, NextMBB
, BranchProbToNext
);
1097 // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
1098 // one as they are relative probabilities (and thus work more like weights),
1099 // and hence we need to normalize them to let the sum of them become one.
1100 SwitchBB
->normalizeSuccProbs();
1102 // Record the fact that the IR edge from the header to the bit test target
1103 // will go through our new block. Neeeded for PHIs to have nodes added.
1104 addMachineCFGPred({BB
.Parent
->getBasicBlock(), B
.TargetBB
->getBasicBlock()},
1107 MIB
.buildBrCond(Cmp
, *B
.TargetBB
);
1109 // Avoid emitting unnecessary branches to the next block.
1110 if (NextMBB
!= SwitchBB
->getNextNode())
1111 MIB
.buildBr(*NextMBB
);
1114 bool IRTranslator::lowerBitTestWorkItem(
1115 SwitchCG::SwitchWorkListItem W
, MachineBasicBlock
*SwitchMBB
,
1116 MachineBasicBlock
*CurMBB
, MachineBasicBlock
*DefaultMBB
,
1117 MachineIRBuilder
&MIB
, MachineFunction::iterator BBI
,
1118 BranchProbability DefaultProb
, BranchProbability UnhandledProbs
,
1119 SwitchCG::CaseClusterIt I
, MachineBasicBlock
*Fallthrough
,
1120 bool FallthroughUnreachable
) {
1121 using namespace SwitchCG
;
1122 MachineFunction
*CurMF
= SwitchMBB
->getParent();
1123 // FIXME: Optimize away range check based on pivot comparisons.
1124 BitTestBlock
*BTB
= &SL
->BitTestCases
[I
->BTCasesIndex
];
1125 // The bit test blocks haven't been inserted yet; insert them here.
1126 for (BitTestCase
&BTC
: BTB
->Cases
)
1127 CurMF
->insert(BBI
, BTC
.ThisBB
);
1129 // Fill in fields of the BitTestBlock.
1130 BTB
->Parent
= CurMBB
;
1131 BTB
->Default
= Fallthrough
;
1133 BTB
->DefaultProb
= UnhandledProbs
;
1134 // If the cases in bit test don't form a contiguous range, we evenly
1135 // distribute the probability on the edge to Fallthrough to two
1136 // successors of CurMBB.
1137 if (!BTB
->ContiguousRange
) {
1138 BTB
->Prob
+= DefaultProb
/ 2;
1139 BTB
->DefaultProb
-= DefaultProb
/ 2;
1142 if (FallthroughUnreachable
)
1143 BTB
->FallthroughUnreachable
= true;
1145 // If we're in the right place, emit the bit test header right now.
1146 if (CurMBB
== SwitchMBB
) {
1147 emitBitTestHeader(*BTB
, SwitchMBB
);
1148 BTB
->Emitted
= true;
1153 bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W
,
1155 MachineBasicBlock
*SwitchMBB
,
1156 MachineBasicBlock
*DefaultMBB
,
1157 MachineIRBuilder
&MIB
) {
1158 using namespace SwitchCG
;
1159 MachineFunction
*CurMF
= FuncInfo
.MF
;
1160 MachineBasicBlock
*NextMBB
= nullptr;
1161 MachineFunction::iterator
BBI(W
.MBB
);
1162 if (++BBI
!= FuncInfo
.MF
->end())
1166 // Here, we order cases by probability so the most likely case will be
1167 // checked first. However, two clusters can have the same probability in
1168 // which case their relative ordering is non-deterministic. So we use Low
1169 // as a tie-breaker as clusters are guaranteed to never overlap.
1170 llvm::sort(W
.FirstCluster
, W
.LastCluster
+ 1,
1171 [](const CaseCluster
&a
, const CaseCluster
&b
) {
1172 return a
.Prob
!= b
.Prob
1174 : a
.Low
->getValue().slt(b
.Low
->getValue());
1177 // Rearrange the case blocks so that the last one falls through if possible
1178 // without changing the order of probabilities.
1179 for (CaseClusterIt I
= W
.LastCluster
; I
> W
.FirstCluster
;) {
1181 if (I
->Prob
> W
.LastCluster
->Prob
)
1183 if (I
->Kind
== CC_Range
&& I
->MBB
== NextMBB
) {
1184 std::swap(*I
, *W
.LastCluster
);
1190 // Compute total probability.
1191 BranchProbability DefaultProb
= W
.DefaultProb
;
1192 BranchProbability UnhandledProbs
= DefaultProb
;
1193 for (CaseClusterIt I
= W
.FirstCluster
; I
<= W
.LastCluster
; ++I
)
1194 UnhandledProbs
+= I
->Prob
;
1196 MachineBasicBlock
*CurMBB
= W
.MBB
;
1197 for (CaseClusterIt I
= W
.FirstCluster
, E
= W
.LastCluster
; I
<= E
; ++I
) {
1198 bool FallthroughUnreachable
= false;
1199 MachineBasicBlock
*Fallthrough
;
1200 if (I
== W
.LastCluster
) {
1201 // For the last cluster, fall through to the default destination.
1202 Fallthrough
= DefaultMBB
;
1203 FallthroughUnreachable
= isa
<UnreachableInst
>(
1204 DefaultMBB
->getBasicBlock()->getFirstNonPHIOrDbg());
1206 Fallthrough
= CurMF
->CreateMachineBasicBlock(CurMBB
->getBasicBlock());
1207 CurMF
->insert(BBI
, Fallthrough
);
1209 UnhandledProbs
-= I
->Prob
;
1213 if (!lowerBitTestWorkItem(W
, SwitchMBB
, CurMBB
, DefaultMBB
, MIB
, BBI
,
1214 DefaultProb
, UnhandledProbs
, I
, Fallthrough
,
1215 FallthroughUnreachable
)) {
1216 LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
1222 case CC_JumpTable
: {
1223 if (!lowerJumpTableWorkItem(W
, SwitchMBB
, CurMBB
, DefaultMBB
, MIB
, BBI
,
1224 UnhandledProbs
, I
, Fallthrough
,
1225 FallthroughUnreachable
)) {
1226 LLVM_DEBUG(dbgs() << "Failed to lower jump table");
1232 if (!lowerSwitchRangeWorkItem(I
, Cond
, Fallthrough
,
1233 FallthroughUnreachable
, UnhandledProbs
,
1234 CurMBB
, MIB
, SwitchMBB
)) {
1235 LLVM_DEBUG(dbgs() << "Failed to lower switch range");
1241 CurMBB
= Fallthrough
;
1247 bool IRTranslator::translateIndirectBr(const User
&U
,
1248 MachineIRBuilder
&MIRBuilder
) {
1249 const IndirectBrInst
&BrInst
= cast
<IndirectBrInst
>(U
);
1251 const Register Tgt
= getOrCreateVReg(*BrInst
.getAddress());
1252 MIRBuilder
.buildBrIndirect(Tgt
);
1255 SmallPtrSet
<const BasicBlock
*, 32> AddedSuccessors
;
1256 MachineBasicBlock
&CurBB
= MIRBuilder
.getMBB();
1257 for (const BasicBlock
*Succ
: successors(&BrInst
)) {
1258 // It's legal for indirectbr instructions to have duplicate blocks in the
1259 // destination list. We don't allow this in MIR. Skip anything that's
1260 // already a successor.
1261 if (!AddedSuccessors
.insert(Succ
).second
)
1263 CurBB
.addSuccessor(&getMBB(*Succ
));
1269 static bool isSwiftError(const Value
*V
) {
1270 if (auto Arg
= dyn_cast
<Argument
>(V
))
1271 return Arg
->hasSwiftErrorAttr();
1272 if (auto AI
= dyn_cast
<AllocaInst
>(V
))
1273 return AI
->isSwiftError();
1277 bool IRTranslator::translateLoad(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
1278 const LoadInst
&LI
= cast
<LoadInst
>(U
);
1279 if (DL
->getTypeStoreSize(LI
.getType()) == 0)
1282 ArrayRef
<Register
> Regs
= getOrCreateVRegs(LI
);
1283 ArrayRef
<uint64_t> Offsets
= *VMap
.getOffsets(LI
);
1284 Register Base
= getOrCreateVReg(*LI
.getPointerOperand());
1286 Type
*OffsetIRTy
= DL
->getIntPtrType(LI
.getPointerOperandType());
1287 LLT OffsetTy
= getLLTForType(*OffsetIRTy
, *DL
);
1289 if (CLI
->supportSwiftError() && isSwiftError(LI
.getPointerOperand())) {
1290 assert(Regs
.size() == 1 && "swifterror should be single pointer");
1291 Register VReg
= SwiftError
.getOrCreateVRegUseAt(&LI
, &MIRBuilder
.getMBB(),
1292 LI
.getPointerOperand());
1293 MIRBuilder
.buildCopy(Regs
[0], VReg
);
1297 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
1298 MachineMemOperand::Flags Flags
= TLI
.getLoadMemOperandFlags(LI
, *DL
);
1300 const MDNode
*Ranges
=
1301 Regs
.size() == 1 ? LI
.getMetadata(LLVMContext::MD_range
) : nullptr;
1302 for (unsigned i
= 0; i
< Regs
.size(); ++i
) {
1304 MIRBuilder
.materializePtrAdd(Addr
, Base
, OffsetTy
, Offsets
[i
] / 8);
1306 MachinePointerInfo
Ptr(LI
.getPointerOperand(), Offsets
[i
] / 8);
1307 Align BaseAlign
= getMemOpAlign(LI
);
1308 auto MMO
= MF
->getMachineMemOperand(
1309 Ptr
, Flags
, MRI
->getType(Regs
[i
]),
1310 commonAlignment(BaseAlign
, Offsets
[i
] / 8), LI
.getAAMetadata(), Ranges
,
1311 LI
.getSyncScopeID(), LI
.getOrdering());
1312 MIRBuilder
.buildLoad(Regs
[i
], Addr
, *MMO
);
1318 bool IRTranslator::translateStore(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
1319 const StoreInst
&SI
= cast
<StoreInst
>(U
);
1320 if (DL
->getTypeStoreSize(SI
.getValueOperand()->getType()) == 0)
1323 ArrayRef
<Register
> Vals
= getOrCreateVRegs(*SI
.getValueOperand());
1324 ArrayRef
<uint64_t> Offsets
= *VMap
.getOffsets(*SI
.getValueOperand());
1325 Register Base
= getOrCreateVReg(*SI
.getPointerOperand());
1327 Type
*OffsetIRTy
= DL
->getIntPtrType(SI
.getPointerOperandType());
1328 LLT OffsetTy
= getLLTForType(*OffsetIRTy
, *DL
);
1330 if (CLI
->supportSwiftError() && isSwiftError(SI
.getPointerOperand())) {
1331 assert(Vals
.size() == 1 && "swifterror should be single pointer");
1333 Register VReg
= SwiftError
.getOrCreateVRegDefAt(&SI
, &MIRBuilder
.getMBB(),
1334 SI
.getPointerOperand());
1335 MIRBuilder
.buildCopy(VReg
, Vals
[0]);
1339 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
1340 MachineMemOperand::Flags Flags
= TLI
.getStoreMemOperandFlags(SI
, *DL
);
1342 for (unsigned i
= 0; i
< Vals
.size(); ++i
) {
1344 MIRBuilder
.materializePtrAdd(Addr
, Base
, OffsetTy
, Offsets
[i
] / 8);
1346 MachinePointerInfo
Ptr(SI
.getPointerOperand(), Offsets
[i
] / 8);
1347 Align BaseAlign
= getMemOpAlign(SI
);
1348 auto MMO
= MF
->getMachineMemOperand(
1349 Ptr
, Flags
, MRI
->getType(Vals
[i
]),
1350 commonAlignment(BaseAlign
, Offsets
[i
] / 8), SI
.getAAMetadata(), nullptr,
1351 SI
.getSyncScopeID(), SI
.getOrdering());
1352 MIRBuilder
.buildStore(Vals
[i
], Addr
, *MMO
);
1357 static uint64_t getOffsetFromIndices(const User
&U
, const DataLayout
&DL
) {
1358 const Value
*Src
= U
.getOperand(0);
1359 Type
*Int32Ty
= Type::getInt32Ty(U
.getContext());
1361 // getIndexedOffsetInType is designed for GEPs, so the first index is the
1362 // usual array element rather than looking into the actual aggregate.
1363 SmallVector
<Value
*, 1> Indices
;
1364 Indices
.push_back(ConstantInt::get(Int32Ty
, 0));
1366 if (const ExtractValueInst
*EVI
= dyn_cast
<ExtractValueInst
>(&U
)) {
1367 for (auto Idx
: EVI
->indices())
1368 Indices
.push_back(ConstantInt::get(Int32Ty
, Idx
));
1369 } else if (const InsertValueInst
*IVI
= dyn_cast
<InsertValueInst
>(&U
)) {
1370 for (auto Idx
: IVI
->indices())
1371 Indices
.push_back(ConstantInt::get(Int32Ty
, Idx
));
1373 for (unsigned i
= 1; i
< U
.getNumOperands(); ++i
)
1374 Indices
.push_back(U
.getOperand(i
));
1377 return 8 * static_cast<uint64_t>(
1378 DL
.getIndexedOffsetInType(Src
->getType(), Indices
));
1381 bool IRTranslator::translateExtractValue(const User
&U
,
1382 MachineIRBuilder
&MIRBuilder
) {
1383 const Value
*Src
= U
.getOperand(0);
1384 uint64_t Offset
= getOffsetFromIndices(U
, *DL
);
1385 ArrayRef
<Register
> SrcRegs
= getOrCreateVRegs(*Src
);
1386 ArrayRef
<uint64_t> Offsets
= *VMap
.getOffsets(*Src
);
1387 unsigned Idx
= llvm::lower_bound(Offsets
, Offset
) - Offsets
.begin();
1388 auto &DstRegs
= allocateVRegs(U
);
1390 for (unsigned i
= 0; i
< DstRegs
.size(); ++i
)
1391 DstRegs
[i
] = SrcRegs
[Idx
++];
1396 bool IRTranslator::translateInsertValue(const User
&U
,
1397 MachineIRBuilder
&MIRBuilder
) {
1398 const Value
*Src
= U
.getOperand(0);
1399 uint64_t Offset
= getOffsetFromIndices(U
, *DL
);
1400 auto &DstRegs
= allocateVRegs(U
);
1401 ArrayRef
<uint64_t> DstOffsets
= *VMap
.getOffsets(U
);
1402 ArrayRef
<Register
> SrcRegs
= getOrCreateVRegs(*Src
);
1403 ArrayRef
<Register
> InsertedRegs
= getOrCreateVRegs(*U
.getOperand(1));
1404 auto InsertedIt
= InsertedRegs
.begin();
1406 for (unsigned i
= 0; i
< DstRegs
.size(); ++i
) {
1407 if (DstOffsets
[i
] >= Offset
&& InsertedIt
!= InsertedRegs
.end())
1408 DstRegs
[i
] = *InsertedIt
++;
1410 DstRegs
[i
] = SrcRegs
[i
];
1416 bool IRTranslator::translateSelect(const User
&U
,
1417 MachineIRBuilder
&MIRBuilder
) {
1418 Register Tst
= getOrCreateVReg(*U
.getOperand(0));
1419 ArrayRef
<Register
> ResRegs
= getOrCreateVRegs(U
);
1420 ArrayRef
<Register
> Op0Regs
= getOrCreateVRegs(*U
.getOperand(1));
1421 ArrayRef
<Register
> Op1Regs
= getOrCreateVRegs(*U
.getOperand(2));
1424 if (const SelectInst
*SI
= dyn_cast
<SelectInst
>(&U
))
1425 Flags
= MachineInstr::copyFlagsFromInstruction(*SI
);
1427 for (unsigned i
= 0; i
< ResRegs
.size(); ++i
) {
1428 MIRBuilder
.buildSelect(ResRegs
[i
], Tst
, Op0Regs
[i
], Op1Regs
[i
], Flags
);
1434 bool IRTranslator::translateCopy(const User
&U
, const Value
&V
,
1435 MachineIRBuilder
&MIRBuilder
) {
1436 Register Src
= getOrCreateVReg(V
);
1437 auto &Regs
= *VMap
.getVRegs(U
);
1439 Regs
.push_back(Src
);
1440 VMap
.getOffsets(U
)->push_back(0);
1442 // If we already assigned a vreg for this instruction, we can't change that.
1443 // Emit a copy to satisfy the users we already emitted.
1444 MIRBuilder
.buildCopy(Regs
[0], Src
);
1449 bool IRTranslator::translateBitCast(const User
&U
,
1450 MachineIRBuilder
&MIRBuilder
) {
1451 // If we're bitcasting to the source type, we can reuse the source vreg.
1452 if (getLLTForType(*U
.getOperand(0)->getType(), *DL
) ==
1453 getLLTForType(*U
.getType(), *DL
))
1454 return translateCopy(U
, *U
.getOperand(0), MIRBuilder
);
1456 return translateCast(TargetOpcode::G_BITCAST
, U
, MIRBuilder
);
1459 bool IRTranslator::translateCast(unsigned Opcode
, const User
&U
,
1460 MachineIRBuilder
&MIRBuilder
) {
1461 Register Op
= getOrCreateVReg(*U
.getOperand(0));
1462 Register Res
= getOrCreateVReg(U
);
1463 MIRBuilder
.buildInstr(Opcode
, {Res
}, {Op
});
1467 bool IRTranslator::translateGetElementPtr(const User
&U
,
1468 MachineIRBuilder
&MIRBuilder
) {
1469 Value
&Op0
= *U
.getOperand(0);
1470 Register BaseReg
= getOrCreateVReg(Op0
);
1471 Type
*PtrIRTy
= Op0
.getType();
1472 LLT PtrTy
= getLLTForType(*PtrIRTy
, *DL
);
1473 Type
*OffsetIRTy
= DL
->getIntPtrType(PtrIRTy
);
1474 LLT OffsetTy
= getLLTForType(*OffsetIRTy
, *DL
);
1476 // Normalize Vector GEP - all scalar operands should be converted to the
1478 unsigned VectorWidth
= 0;
1480 // True if we should use a splat vector; using VectorWidth alone is not
1482 bool WantSplatVector
= false;
1483 if (auto *VT
= dyn_cast
<VectorType
>(U
.getType())) {
1484 VectorWidth
= cast
<FixedVectorType
>(VT
)->getNumElements();
1485 // We don't produce 1 x N vectors; those are treated as scalars.
1486 WantSplatVector
= VectorWidth
> 1;
1489 // We might need to splat the base pointer into a vector if the offsets
1491 if (WantSplatVector
&& !PtrTy
.isVector()) {
1494 .buildSplatVector(LLT::fixed_vector(VectorWidth
, PtrTy
), BaseReg
)
1496 PtrIRTy
= FixedVectorType::get(PtrIRTy
, VectorWidth
);
1497 PtrTy
= getLLTForType(*PtrIRTy
, *DL
);
1498 OffsetIRTy
= DL
->getIntPtrType(PtrIRTy
);
1499 OffsetTy
= getLLTForType(*OffsetIRTy
, *DL
);
1503 for (gep_type_iterator GTI
= gep_type_begin(&U
), E
= gep_type_end(&U
);
1505 const Value
*Idx
= GTI
.getOperand();
1506 if (StructType
*StTy
= GTI
.getStructTypeOrNull()) {
1507 unsigned Field
= cast
<Constant
>(Idx
)->getUniqueInteger().getZExtValue();
1508 Offset
+= DL
->getStructLayout(StTy
)->getElementOffset(Field
);
1511 uint64_t ElementSize
= DL
->getTypeAllocSize(GTI
.getIndexedType());
1513 // If this is a scalar constant or a splat vector of constants,
1514 // handle it quickly.
1515 if (const auto *CI
= dyn_cast
<ConstantInt
>(Idx
)) {
1516 Offset
+= ElementSize
* CI
->getSExtValue();
1521 auto OffsetMIB
= MIRBuilder
.buildConstant({OffsetTy
}, Offset
);
1522 BaseReg
= MIRBuilder
.buildPtrAdd(PtrTy
, BaseReg
, OffsetMIB
.getReg(0))
1527 Register IdxReg
= getOrCreateVReg(*Idx
);
1528 LLT IdxTy
= MRI
->getType(IdxReg
);
1529 if (IdxTy
!= OffsetTy
) {
1530 if (!IdxTy
.isVector() && WantSplatVector
) {
1531 IdxReg
= MIRBuilder
.buildSplatVector(
1532 OffsetTy
.changeElementType(IdxTy
), IdxReg
).getReg(0);
1535 IdxReg
= MIRBuilder
.buildSExtOrTrunc(OffsetTy
, IdxReg
).getReg(0);
1538 // N = N + Idx * ElementSize;
1539 // Avoid doing it for ElementSize of 1.
1540 Register GepOffsetReg
;
1541 if (ElementSize
!= 1) {
1542 auto ElementSizeMIB
= MIRBuilder
.buildConstant(
1543 getLLTForType(*OffsetIRTy
, *DL
), ElementSize
);
1545 MIRBuilder
.buildMul(OffsetTy
, IdxReg
, ElementSizeMIB
).getReg(0);
1547 GepOffsetReg
= IdxReg
;
1549 BaseReg
= MIRBuilder
.buildPtrAdd(PtrTy
, BaseReg
, GepOffsetReg
).getReg(0);
1555 MIRBuilder
.buildConstant(OffsetTy
, Offset
);
1556 MIRBuilder
.buildPtrAdd(getOrCreateVReg(U
), BaseReg
, OffsetMIB
.getReg(0));
1560 MIRBuilder
.buildCopy(getOrCreateVReg(U
), BaseReg
);
1564 bool IRTranslator::translateMemFunc(const CallInst
&CI
,
1565 MachineIRBuilder
&MIRBuilder
,
1568 // If the source is undef, then just emit a nop.
1569 if (isa
<UndefValue
>(CI
.getArgOperand(1)))
1572 SmallVector
<Register
, 3> SrcRegs
;
1574 unsigned MinPtrSize
= UINT_MAX
;
1575 for (auto AI
= CI
.arg_begin(), AE
= CI
.arg_end(); std::next(AI
) != AE
; ++AI
) {
1576 Register SrcReg
= getOrCreateVReg(**AI
);
1577 LLT SrcTy
= MRI
->getType(SrcReg
);
1578 if (SrcTy
.isPointer())
1579 MinPtrSize
= std::min
<unsigned>(SrcTy
.getSizeInBits(), MinPtrSize
);
1580 SrcRegs
.push_back(SrcReg
);
1583 LLT SizeTy
= LLT::scalar(MinPtrSize
);
1585 // The size operand should be the minimum of the pointer sizes.
1586 Register
&SizeOpReg
= SrcRegs
[SrcRegs
.size() - 1];
1587 if (MRI
->getType(SizeOpReg
) != SizeTy
)
1588 SizeOpReg
= MIRBuilder
.buildZExtOrTrunc(SizeTy
, SizeOpReg
).getReg(0);
1590 auto ICall
= MIRBuilder
.buildInstr(Opcode
);
1591 for (Register SrcReg
: SrcRegs
)
1592 ICall
.addUse(SrcReg
);
1597 cast
<ConstantInt
>(CI
.getArgOperand(CI
.arg_size() - 1))->getZExtValue();
1599 if (auto *MCI
= dyn_cast
<MemCpyInst
>(&CI
)) {
1600 DstAlign
= MCI
->getDestAlign().valueOrOne();
1601 SrcAlign
= MCI
->getSourceAlign().valueOrOne();
1602 } else if (auto *MCI
= dyn_cast
<MemCpyInlineInst
>(&CI
)) {
1603 DstAlign
= MCI
->getDestAlign().valueOrOne();
1604 SrcAlign
= MCI
->getSourceAlign().valueOrOne();
1605 } else if (auto *MMI
= dyn_cast
<MemMoveInst
>(&CI
)) {
1606 DstAlign
= MMI
->getDestAlign().valueOrOne();
1607 SrcAlign
= MMI
->getSourceAlign().valueOrOne();
1609 auto *MSI
= cast
<MemSetInst
>(&CI
);
1610 DstAlign
= MSI
->getDestAlign().valueOrOne();
1613 if (Opcode
!= TargetOpcode::G_MEMCPY_INLINE
) {
1614 // We need to propagate the tail call flag from the IR inst as an argument.
1615 // Otherwise, we have to pessimize and assume later that we cannot tail call
1616 // any memory intrinsics.
1617 ICall
.addImm(CI
.isTailCall() ? 1 : 0);
1620 // Create mem operands to store the alignment and volatile info.
1621 auto VolFlag
= IsVol
? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone
;
1622 ICall
.addMemOperand(MF
->getMachineMemOperand(
1623 MachinePointerInfo(CI
.getArgOperand(0)),
1624 MachineMemOperand::MOStore
| VolFlag
, 1, DstAlign
));
1625 if (Opcode
!= TargetOpcode::G_MEMSET
)
1626 ICall
.addMemOperand(MF
->getMachineMemOperand(
1627 MachinePointerInfo(CI
.getArgOperand(1)),
1628 MachineMemOperand::MOLoad
| VolFlag
, 1, SrcAlign
));
1633 void IRTranslator::getStackGuard(Register DstReg
,
1634 MachineIRBuilder
&MIRBuilder
) {
1635 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1636 MRI
->setRegClass(DstReg
, TRI
->getPointerRegClass(*MF
));
1638 MIRBuilder
.buildInstr(TargetOpcode::LOAD_STACK_GUARD
, {DstReg
}, {});
1640 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
1641 Value
*Global
= TLI
.getSDagStackGuard(*MF
->getFunction().getParent());
1645 unsigned AddrSpace
= Global
->getType()->getPointerAddressSpace();
1646 LLT PtrTy
= LLT::pointer(AddrSpace
, DL
->getPointerSizeInBits(AddrSpace
));
1648 MachinePointerInfo
MPInfo(Global
);
1649 auto Flags
= MachineMemOperand::MOLoad
| MachineMemOperand::MOInvariant
|
1650 MachineMemOperand::MODereferenceable
;
1651 MachineMemOperand
*MemRef
= MF
->getMachineMemOperand(
1652 MPInfo
, Flags
, PtrTy
, DL
->getPointerABIAlignment(AddrSpace
));
1653 MIB
.setMemRefs({MemRef
});
1656 bool IRTranslator::translateOverflowIntrinsic(const CallInst
&CI
, unsigned Op
,
1657 MachineIRBuilder
&MIRBuilder
) {
1658 ArrayRef
<Register
> ResRegs
= getOrCreateVRegs(CI
);
1659 MIRBuilder
.buildInstr(
1660 Op
, {ResRegs
[0], ResRegs
[1]},
1661 {getOrCreateVReg(*CI
.getOperand(0)), getOrCreateVReg(*CI
.getOperand(1))});
1666 bool IRTranslator::translateFixedPointIntrinsic(unsigned Op
, const CallInst
&CI
,
1667 MachineIRBuilder
&MIRBuilder
) {
1668 Register Dst
= getOrCreateVReg(CI
);
1669 Register Src0
= getOrCreateVReg(*CI
.getOperand(0));
1670 Register Src1
= getOrCreateVReg(*CI
.getOperand(1));
1671 uint64_t Scale
= cast
<ConstantInt
>(CI
.getOperand(2))->getZExtValue();
1672 MIRBuilder
.buildInstr(Op
, {Dst
}, { Src0
, Src1
, Scale
});
1676 unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID
) {
1680 case Intrinsic::bswap
:
1681 return TargetOpcode::G_BSWAP
;
1682 case Intrinsic::bitreverse
:
1683 return TargetOpcode::G_BITREVERSE
;
1684 case Intrinsic::fshl
:
1685 return TargetOpcode::G_FSHL
;
1686 case Intrinsic::fshr
:
1687 return TargetOpcode::G_FSHR
;
1688 case Intrinsic::ceil
:
1689 return TargetOpcode::G_FCEIL
;
1690 case Intrinsic::cos
:
1691 return TargetOpcode::G_FCOS
;
1692 case Intrinsic::ctpop
:
1693 return TargetOpcode::G_CTPOP
;
1694 case Intrinsic::exp
:
1695 return TargetOpcode::G_FEXP
;
1696 case Intrinsic::exp2
:
1697 return TargetOpcode::G_FEXP2
;
1698 case Intrinsic::fabs
:
1699 return TargetOpcode::G_FABS
;
1700 case Intrinsic::copysign
:
1701 return TargetOpcode::G_FCOPYSIGN
;
1702 case Intrinsic::minnum
:
1703 return TargetOpcode::G_FMINNUM
;
1704 case Intrinsic::maxnum
:
1705 return TargetOpcode::G_FMAXNUM
;
1706 case Intrinsic::minimum
:
1707 return TargetOpcode::G_FMINIMUM
;
1708 case Intrinsic::maximum
:
1709 return TargetOpcode::G_FMAXIMUM
;
1710 case Intrinsic::canonicalize
:
1711 return TargetOpcode::G_FCANONICALIZE
;
1712 case Intrinsic::floor
:
1713 return TargetOpcode::G_FFLOOR
;
1714 case Intrinsic::fma
:
1715 return TargetOpcode::G_FMA
;
1716 case Intrinsic::log
:
1717 return TargetOpcode::G_FLOG
;
1718 case Intrinsic::log2
:
1719 return TargetOpcode::G_FLOG2
;
1720 case Intrinsic::log10
:
1721 return TargetOpcode::G_FLOG10
;
1722 case Intrinsic::nearbyint
:
1723 return TargetOpcode::G_FNEARBYINT
;
1724 case Intrinsic::pow
:
1725 return TargetOpcode::G_FPOW
;
1726 case Intrinsic::powi
:
1727 return TargetOpcode::G_FPOWI
;
1728 case Intrinsic::rint
:
1729 return TargetOpcode::G_FRINT
;
1730 case Intrinsic::round
:
1731 return TargetOpcode::G_INTRINSIC_ROUND
;
1732 case Intrinsic::roundeven
:
1733 return TargetOpcode::G_INTRINSIC_ROUNDEVEN
;
1734 case Intrinsic::sin
:
1735 return TargetOpcode::G_FSIN
;
1736 case Intrinsic::sqrt
:
1737 return TargetOpcode::G_FSQRT
;
1738 case Intrinsic::trunc
:
1739 return TargetOpcode::G_INTRINSIC_TRUNC
;
1740 case Intrinsic::readcyclecounter
:
1741 return TargetOpcode::G_READCYCLECOUNTER
;
1742 case Intrinsic::ptrmask
:
1743 return TargetOpcode::G_PTRMASK
;
1744 case Intrinsic::lrint
:
1745 return TargetOpcode::G_INTRINSIC_LRINT
;
1746 // FADD/FMUL require checking the FMF, so are handled elsewhere.
1747 case Intrinsic::vector_reduce_fmin
:
1748 return TargetOpcode::G_VECREDUCE_FMIN
;
1749 case Intrinsic::vector_reduce_fmax
:
1750 return TargetOpcode::G_VECREDUCE_FMAX
;
1751 case Intrinsic::vector_reduce_add
:
1752 return TargetOpcode::G_VECREDUCE_ADD
;
1753 case Intrinsic::vector_reduce_mul
:
1754 return TargetOpcode::G_VECREDUCE_MUL
;
1755 case Intrinsic::vector_reduce_and
:
1756 return TargetOpcode::G_VECREDUCE_AND
;
1757 case Intrinsic::vector_reduce_or
:
1758 return TargetOpcode::G_VECREDUCE_OR
;
1759 case Intrinsic::vector_reduce_xor
:
1760 return TargetOpcode::G_VECREDUCE_XOR
;
1761 case Intrinsic::vector_reduce_smax
:
1762 return TargetOpcode::G_VECREDUCE_SMAX
;
1763 case Intrinsic::vector_reduce_smin
:
1764 return TargetOpcode::G_VECREDUCE_SMIN
;
1765 case Intrinsic::vector_reduce_umax
:
1766 return TargetOpcode::G_VECREDUCE_UMAX
;
1767 case Intrinsic::vector_reduce_umin
:
1768 return TargetOpcode::G_VECREDUCE_UMIN
;
1769 case Intrinsic::lround
:
1770 return TargetOpcode::G_LROUND
;
1771 case Intrinsic::llround
:
1772 return TargetOpcode::G_LLROUND
;
1774 return Intrinsic::not_intrinsic
;
1777 bool IRTranslator::translateSimpleIntrinsic(const CallInst
&CI
,
1779 MachineIRBuilder
&MIRBuilder
) {
1781 unsigned Op
= getSimpleIntrinsicOpcode(ID
);
1783 // Is this a simple intrinsic?
1784 if (Op
== Intrinsic::not_intrinsic
)
1787 // Yes. Let's translate it.
1788 SmallVector
<llvm::SrcOp
, 4> VRegs
;
1789 for (auto &Arg
: CI
.args())
1790 VRegs
.push_back(getOrCreateVReg(*Arg
));
1792 MIRBuilder
.buildInstr(Op
, {getOrCreateVReg(CI
)}, VRegs
,
1793 MachineInstr::copyFlagsFromInstruction(CI
));
1797 // TODO: Include ConstainedOps.def when all strict instructions are defined.
1798 static unsigned getConstrainedOpcode(Intrinsic::ID ID
) {
1800 case Intrinsic::experimental_constrained_fadd
:
1801 return TargetOpcode::G_STRICT_FADD
;
1802 case Intrinsic::experimental_constrained_fsub
:
1803 return TargetOpcode::G_STRICT_FSUB
;
1804 case Intrinsic::experimental_constrained_fmul
:
1805 return TargetOpcode::G_STRICT_FMUL
;
1806 case Intrinsic::experimental_constrained_fdiv
:
1807 return TargetOpcode::G_STRICT_FDIV
;
1808 case Intrinsic::experimental_constrained_frem
:
1809 return TargetOpcode::G_STRICT_FREM
;
1810 case Intrinsic::experimental_constrained_fma
:
1811 return TargetOpcode::G_STRICT_FMA
;
1812 case Intrinsic::experimental_constrained_sqrt
:
1813 return TargetOpcode::G_STRICT_FSQRT
;
1819 bool IRTranslator::translateConstrainedFPIntrinsic(
1820 const ConstrainedFPIntrinsic
&FPI
, MachineIRBuilder
&MIRBuilder
) {
1821 fp::ExceptionBehavior EB
= FPI
.getExceptionBehavior().getValue();
1823 unsigned Opcode
= getConstrainedOpcode(FPI
.getIntrinsicID());
1827 unsigned Flags
= MachineInstr::copyFlagsFromInstruction(FPI
);
1828 if (EB
== fp::ExceptionBehavior::ebIgnore
)
1829 Flags
|= MachineInstr::NoFPExcept
;
1831 SmallVector
<llvm::SrcOp
, 4> VRegs
;
1832 VRegs
.push_back(getOrCreateVReg(*FPI
.getArgOperand(0)));
1833 if (!FPI
.isUnaryOp())
1834 VRegs
.push_back(getOrCreateVReg(*FPI
.getArgOperand(1)));
1835 if (FPI
.isTernaryOp())
1836 VRegs
.push_back(getOrCreateVReg(*FPI
.getArgOperand(2)));
1838 MIRBuilder
.buildInstr(Opcode
, {getOrCreateVReg(FPI
)}, VRegs
, Flags
);
1842 bool IRTranslator::translateKnownIntrinsic(const CallInst
&CI
, Intrinsic::ID ID
,
1843 MachineIRBuilder
&MIRBuilder
) {
1844 if (auto *MI
= dyn_cast
<AnyMemIntrinsic
>(&CI
)) {
1845 if (ORE
->enabled()) {
1846 const Function
&F
= *MI
->getParent()->getParent();
1847 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
1848 if (MemoryOpRemark::canHandle(MI
, TLI
)) {
1849 MemoryOpRemark
R(*ORE
, "gisel-irtranslator-memsize", *DL
, TLI
);
1855 // If this is a simple intrinsic (that is, we just need to add a def of
1856 // a vreg, and uses for each arg operand, then translate it.
1857 if (translateSimpleIntrinsic(CI
, ID
, MIRBuilder
))
1863 case Intrinsic::lifetime_start
:
1864 case Intrinsic::lifetime_end
: {
1865 // No stack colouring in O0, discard region information.
1866 if (MF
->getTarget().getOptLevel() == CodeGenOpt::None
)
1869 unsigned Op
= ID
== Intrinsic::lifetime_start
? TargetOpcode::LIFETIME_START
1870 : TargetOpcode::LIFETIME_END
;
1872 // Get the underlying objects for the location passed on the lifetime
1874 SmallVector
<const Value
*, 4> Allocas
;
1875 getUnderlyingObjects(CI
.getArgOperand(1), Allocas
);
1877 // Iterate over each underlying object, creating lifetime markers for each
1878 // static alloca. Quit if we find a non-static alloca.
1879 for (const Value
*V
: Allocas
) {
1880 const AllocaInst
*AI
= dyn_cast
<AllocaInst
>(V
);
1884 if (!AI
->isStaticAlloca())
1887 MIRBuilder
.buildInstr(Op
).addFrameIndex(getOrCreateFrameIndex(*AI
));
1891 case Intrinsic::dbg_declare
: {
1892 const DbgDeclareInst
&DI
= cast
<DbgDeclareInst
>(CI
);
1893 assert(DI
.getVariable() && "Missing variable");
1895 const Value
*Address
= DI
.getAddress();
1896 if (!Address
|| isa
<UndefValue
>(Address
)) {
1897 LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< "\n");
1901 assert(DI
.getVariable()->isValidLocationForIntrinsic(
1902 MIRBuilder
.getDebugLoc()) &&
1903 "Expected inlined-at fields to agree");
1904 auto AI
= dyn_cast
<AllocaInst
>(Address
);
1905 if (AI
&& AI
->isStaticAlloca()) {
1906 // Static allocas are tracked at the MF level, no need for DBG_VALUE
1907 // instructions (in fact, they get ignored if they *do* exist).
1908 MF
->setVariableDbgInfo(DI
.getVariable(), DI
.getExpression(),
1909 getOrCreateFrameIndex(*AI
), DI
.getDebugLoc());
1911 // A dbg.declare describes the address of a source variable, so lower it
1912 // into an indirect DBG_VALUE.
1913 MIRBuilder
.buildIndirectDbgValue(getOrCreateVReg(*Address
),
1914 DI
.getVariable(), DI
.getExpression());
1918 case Intrinsic::dbg_label
: {
1919 const DbgLabelInst
&DI
= cast
<DbgLabelInst
>(CI
);
1920 assert(DI
.getLabel() && "Missing label");
1922 assert(DI
.getLabel()->isValidLocationForIntrinsic(
1923 MIRBuilder
.getDebugLoc()) &&
1924 "Expected inlined-at fields to agree");
1926 MIRBuilder
.buildDbgLabel(DI
.getLabel());
1929 case Intrinsic::vaend
:
1930 // No target I know of cares about va_end. Certainly no in-tree target
1931 // does. Simplest intrinsic ever!
1933 case Intrinsic::vastart
: {
1934 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
1935 Value
*Ptr
= CI
.getArgOperand(0);
1936 unsigned ListSize
= TLI
.getVaListSizeInBits(*DL
) / 8;
1938 // FIXME: Get alignment
1939 MIRBuilder
.buildInstr(TargetOpcode::G_VASTART
, {}, {getOrCreateVReg(*Ptr
)})
1940 .addMemOperand(MF
->getMachineMemOperand(MachinePointerInfo(Ptr
),
1941 MachineMemOperand::MOStore
,
1942 ListSize
, Align(1)));
1945 case Intrinsic::dbg_value
: {
1946 // This form of DBG_VALUE is target-independent.
1947 const DbgValueInst
&DI
= cast
<DbgValueInst
>(CI
);
1948 const Value
*V
= DI
.getValue();
1949 assert(DI
.getVariable()->isValidLocationForIntrinsic(
1950 MIRBuilder
.getDebugLoc()) &&
1951 "Expected inlined-at fields to agree");
1952 if (!V
|| DI
.hasArgList()) {
1953 // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
1954 // terminate any prior location.
1955 MIRBuilder
.buildIndirectDbgValue(0, DI
.getVariable(), DI
.getExpression());
1956 } else if (const auto *CI
= dyn_cast
<Constant
>(V
)) {
1957 MIRBuilder
.buildConstDbgValue(*CI
, DI
.getVariable(), DI
.getExpression());
1959 for (Register Reg
: getOrCreateVRegs(*V
)) {
1960 // FIXME: This does not handle register-indirect values at offset 0. The
1961 // direct/indirect thing shouldn't really be handled by something as
1962 // implicit as reg+noreg vs reg+imm in the first place, but it seems
1963 // pretty baked in right now.
1964 MIRBuilder
.buildDirectDbgValue(Reg
, DI
.getVariable(), DI
.getExpression());
1969 case Intrinsic::uadd_with_overflow
:
1970 return translateOverflowIntrinsic(CI
, TargetOpcode::G_UADDO
, MIRBuilder
);
1971 case Intrinsic::sadd_with_overflow
:
1972 return translateOverflowIntrinsic(CI
, TargetOpcode::G_SADDO
, MIRBuilder
);
1973 case Intrinsic::usub_with_overflow
:
1974 return translateOverflowIntrinsic(CI
, TargetOpcode::G_USUBO
, MIRBuilder
);
1975 case Intrinsic::ssub_with_overflow
:
1976 return translateOverflowIntrinsic(CI
, TargetOpcode::G_SSUBO
, MIRBuilder
);
1977 case Intrinsic::umul_with_overflow
:
1978 return translateOverflowIntrinsic(CI
, TargetOpcode::G_UMULO
, MIRBuilder
);
1979 case Intrinsic::smul_with_overflow
:
1980 return translateOverflowIntrinsic(CI
, TargetOpcode::G_SMULO
, MIRBuilder
);
1981 case Intrinsic::uadd_sat
:
1982 return translateBinaryOp(TargetOpcode::G_UADDSAT
, CI
, MIRBuilder
);
1983 case Intrinsic::sadd_sat
:
1984 return translateBinaryOp(TargetOpcode::G_SADDSAT
, CI
, MIRBuilder
);
1985 case Intrinsic::usub_sat
:
1986 return translateBinaryOp(TargetOpcode::G_USUBSAT
, CI
, MIRBuilder
);
1987 case Intrinsic::ssub_sat
:
1988 return translateBinaryOp(TargetOpcode::G_SSUBSAT
, CI
, MIRBuilder
);
1989 case Intrinsic::ushl_sat
:
1990 return translateBinaryOp(TargetOpcode::G_USHLSAT
, CI
, MIRBuilder
);
1991 case Intrinsic::sshl_sat
:
1992 return translateBinaryOp(TargetOpcode::G_SSHLSAT
, CI
, MIRBuilder
);
1993 case Intrinsic::umin
:
1994 return translateBinaryOp(TargetOpcode::G_UMIN
, CI
, MIRBuilder
);
1995 case Intrinsic::umax
:
1996 return translateBinaryOp(TargetOpcode::G_UMAX
, CI
, MIRBuilder
);
1997 case Intrinsic::smin
:
1998 return translateBinaryOp(TargetOpcode::G_SMIN
, CI
, MIRBuilder
);
1999 case Intrinsic::smax
:
2000 return translateBinaryOp(TargetOpcode::G_SMAX
, CI
, MIRBuilder
);
2001 case Intrinsic::abs
:
2002 // TODO: Preserve "int min is poison" arg in GMIR?
2003 return translateUnaryOp(TargetOpcode::G_ABS
, CI
, MIRBuilder
);
2004 case Intrinsic::smul_fix
:
2005 return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX
, CI
, MIRBuilder
);
2006 case Intrinsic::umul_fix
:
2007 return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX
, CI
, MIRBuilder
);
2008 case Intrinsic::smul_fix_sat
:
2009 return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT
, CI
, MIRBuilder
);
2010 case Intrinsic::umul_fix_sat
:
2011 return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT
, CI
, MIRBuilder
);
2012 case Intrinsic::sdiv_fix
:
2013 return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX
, CI
, MIRBuilder
);
2014 case Intrinsic::udiv_fix
:
2015 return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX
, CI
, MIRBuilder
);
2016 case Intrinsic::sdiv_fix_sat
:
2017 return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT
, CI
, MIRBuilder
);
2018 case Intrinsic::udiv_fix_sat
:
2019 return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT
, CI
, MIRBuilder
);
2020 case Intrinsic::fmuladd
: {
2021 const TargetMachine
&TM
= MF
->getTarget();
2022 const TargetLowering
&TLI
= *MF
->getSubtarget().getTargetLowering();
2023 Register Dst
= getOrCreateVReg(CI
);
2024 Register Op0
= getOrCreateVReg(*CI
.getArgOperand(0));
2025 Register Op1
= getOrCreateVReg(*CI
.getArgOperand(1));
2026 Register Op2
= getOrCreateVReg(*CI
.getArgOperand(2));
2027 if (TM
.Options
.AllowFPOpFusion
!= FPOpFusion::Strict
&&
2028 TLI
.isFMAFasterThanFMulAndFAdd(*MF
,
2029 TLI
.getValueType(*DL
, CI
.getType()))) {
2030 // TODO: Revisit this to see if we should move this part of the
2031 // lowering to the combiner.
2032 MIRBuilder
.buildFMA(Dst
, Op0
, Op1
, Op2
,
2033 MachineInstr::copyFlagsFromInstruction(CI
));
2035 LLT Ty
= getLLTForType(*CI
.getType(), *DL
);
2036 auto FMul
= MIRBuilder
.buildFMul(
2037 Ty
, Op0
, Op1
, MachineInstr::copyFlagsFromInstruction(CI
));
2038 MIRBuilder
.buildFAdd(Dst
, FMul
, Op2
,
2039 MachineInstr::copyFlagsFromInstruction(CI
));
2043 case Intrinsic::convert_from_fp16
:
2044 // FIXME: This intrinsic should probably be removed from the IR.
2045 MIRBuilder
.buildFPExt(getOrCreateVReg(CI
),
2046 getOrCreateVReg(*CI
.getArgOperand(0)),
2047 MachineInstr::copyFlagsFromInstruction(CI
));
2049 case Intrinsic::convert_to_fp16
:
2050 // FIXME: This intrinsic should probably be removed from the IR.
2051 MIRBuilder
.buildFPTrunc(getOrCreateVReg(CI
),
2052 getOrCreateVReg(*CI
.getArgOperand(0)),
2053 MachineInstr::copyFlagsFromInstruction(CI
));
2055 case Intrinsic::memcpy_inline
:
2056 return translateMemFunc(CI
, MIRBuilder
, TargetOpcode::G_MEMCPY_INLINE
);
2057 case Intrinsic::memcpy
:
2058 return translateMemFunc(CI
, MIRBuilder
, TargetOpcode::G_MEMCPY
);
2059 case Intrinsic::memmove
:
2060 return translateMemFunc(CI
, MIRBuilder
, TargetOpcode::G_MEMMOVE
);
2061 case Intrinsic::memset
:
2062 return translateMemFunc(CI
, MIRBuilder
, TargetOpcode::G_MEMSET
);
2063 case Intrinsic::eh_typeid_for
: {
2064 GlobalValue
*GV
= ExtractTypeInfo(CI
.getArgOperand(0));
2065 Register Reg
= getOrCreateVReg(CI
);
2066 unsigned TypeID
= MF
->getTypeIDFor(GV
);
2067 MIRBuilder
.buildConstant(Reg
, TypeID
);
2070 case Intrinsic::objectsize
:
2071 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2073 case Intrinsic::is_constant
:
2074 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2076 case Intrinsic::stackguard
:
2077 getStackGuard(getOrCreateVReg(CI
), MIRBuilder
);
2079 case Intrinsic::stackprotector
: {
2080 LLT PtrTy
= getLLTForType(*CI
.getArgOperand(0)->getType(), *DL
);
2081 Register GuardVal
= MRI
->createGenericVirtualRegister(PtrTy
);
2082 getStackGuard(GuardVal
, MIRBuilder
);
2084 AllocaInst
*Slot
= cast
<AllocaInst
>(CI
.getArgOperand(1));
2085 int FI
= getOrCreateFrameIndex(*Slot
);
2086 MF
->getFrameInfo().setStackProtectorIndex(FI
);
2088 MIRBuilder
.buildStore(
2089 GuardVal
, getOrCreateVReg(*Slot
),
2090 *MF
->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF
, FI
),
2091 MachineMemOperand::MOStore
|
2092 MachineMemOperand::MOVolatile
,
2096 case Intrinsic::stacksave
: {
2097 // Save the stack pointer to the location provided by the intrinsic.
2098 Register Reg
= getOrCreateVReg(CI
);
2099 Register StackPtr
= MF
->getSubtarget()
2100 .getTargetLowering()
2101 ->getStackPointerRegisterToSaveRestore();
2103 // If the target doesn't specify a stack pointer, then fall back.
2107 MIRBuilder
.buildCopy(Reg
, StackPtr
);
2110 case Intrinsic::stackrestore
: {
2111 // Restore the stack pointer from the location provided by the intrinsic.
2112 Register Reg
= getOrCreateVReg(*CI
.getArgOperand(0));
2113 Register StackPtr
= MF
->getSubtarget()
2114 .getTargetLowering()
2115 ->getStackPointerRegisterToSaveRestore();
2117 // If the target doesn't specify a stack pointer, then fall back.
2121 MIRBuilder
.buildCopy(StackPtr
, Reg
);
2124 case Intrinsic::cttz
:
2125 case Intrinsic::ctlz
: {
2126 ConstantInt
*Cst
= cast
<ConstantInt
>(CI
.getArgOperand(1));
2127 bool isTrailing
= ID
== Intrinsic::cttz
;
2128 unsigned Opcode
= isTrailing
2129 ? Cst
->isZero() ? TargetOpcode::G_CTTZ
2130 : TargetOpcode::G_CTTZ_ZERO_UNDEF
2131 : Cst
->isZero() ? TargetOpcode::G_CTLZ
2132 : TargetOpcode::G_CTLZ_ZERO_UNDEF
;
2133 MIRBuilder
.buildInstr(Opcode
, {getOrCreateVReg(CI
)},
2134 {getOrCreateVReg(*CI
.getArgOperand(0))});
2137 case Intrinsic::invariant_start
: {
2138 LLT PtrTy
= getLLTForType(*CI
.getArgOperand(0)->getType(), *DL
);
2139 Register Undef
= MRI
->createGenericVirtualRegister(PtrTy
);
2140 MIRBuilder
.buildUndef(Undef
);
2143 case Intrinsic::invariant_end
:
2145 case Intrinsic::expect
:
2146 case Intrinsic::annotation
:
2147 case Intrinsic::ptr_annotation
:
2148 case Intrinsic::launder_invariant_group
:
2149 case Intrinsic::strip_invariant_group
: {
2150 // Drop the intrinsic, but forward the value.
2151 MIRBuilder
.buildCopy(getOrCreateVReg(CI
),
2152 getOrCreateVReg(*CI
.getArgOperand(0)));
2155 case Intrinsic::assume
:
2156 case Intrinsic::experimental_noalias_scope_decl
:
2157 case Intrinsic::var_annotation
:
2158 case Intrinsic::sideeffect
:
2159 // Discard annotate attributes, assumptions, and artificial side-effects.
2161 case Intrinsic::read_volatile_register
:
2162 case Intrinsic::read_register
: {
2163 Value
*Arg
= CI
.getArgOperand(0);
2165 .buildInstr(TargetOpcode::G_READ_REGISTER
, {getOrCreateVReg(CI
)}, {})
2166 .addMetadata(cast
<MDNode
>(cast
<MetadataAsValue
>(Arg
)->getMetadata()));
2169 case Intrinsic::write_register
: {
2170 Value
*Arg
= CI
.getArgOperand(0);
2171 MIRBuilder
.buildInstr(TargetOpcode::G_WRITE_REGISTER
)
2172 .addMetadata(cast
<MDNode
>(cast
<MetadataAsValue
>(Arg
)->getMetadata()))
2173 .addUse(getOrCreateVReg(*CI
.getArgOperand(1)));
2176 case Intrinsic::localescape
: {
2177 MachineBasicBlock
&EntryMBB
= MF
->front();
2178 StringRef EscapedName
= GlobalValue::dropLLVMManglingEscape(MF
->getName());
2180 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
2181 // is the same on all targets.
2182 for (unsigned Idx
= 0, E
= CI
.arg_size(); Idx
< E
; ++Idx
) {
2183 Value
*Arg
= CI
.getArgOperand(Idx
)->stripPointerCasts();
2184 if (isa
<ConstantPointerNull
>(Arg
))
2185 continue; // Skip null pointers. They represent a hole in index space.
2187 int FI
= getOrCreateFrameIndex(*cast
<AllocaInst
>(Arg
));
2188 MCSymbol
*FrameAllocSym
=
2189 MF
->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName
,
2192 // This should be inserted at the start of the entry block.
2194 MIRBuilder
.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE
)
2195 .addSym(FrameAllocSym
)
2198 EntryMBB
.insert(EntryMBB
.begin(), LocalEscape
);
2203 case Intrinsic::vector_reduce_fadd
:
2204 case Intrinsic::vector_reduce_fmul
: {
2205 // Need to check for the reassoc flag to decide whether we want a
2206 // sequential reduction opcode or not.
2207 Register Dst
= getOrCreateVReg(CI
);
2208 Register ScalarSrc
= getOrCreateVReg(*CI
.getArgOperand(0));
2209 Register VecSrc
= getOrCreateVReg(*CI
.getArgOperand(1));
2211 if (!CI
.hasAllowReassoc()) {
2212 // The sequential ordering case.
2213 Opc
= ID
== Intrinsic::vector_reduce_fadd
2214 ? TargetOpcode::G_VECREDUCE_SEQ_FADD
2215 : TargetOpcode::G_VECREDUCE_SEQ_FMUL
;
2216 MIRBuilder
.buildInstr(Opc
, {Dst
}, {ScalarSrc
, VecSrc
},
2217 MachineInstr::copyFlagsFromInstruction(CI
));
2220 // We split the operation into a separate G_FADD/G_FMUL + the reduce,
2221 // since the associativity doesn't matter.
2223 if (ID
== Intrinsic::vector_reduce_fadd
) {
2224 Opc
= TargetOpcode::G_VECREDUCE_FADD
;
2225 ScalarOpc
= TargetOpcode::G_FADD
;
2227 Opc
= TargetOpcode::G_VECREDUCE_FMUL
;
2228 ScalarOpc
= TargetOpcode::G_FMUL
;
2230 LLT DstTy
= MRI
->getType(Dst
);
2231 auto Rdx
= MIRBuilder
.buildInstr(
2232 Opc
, {DstTy
}, {VecSrc
}, MachineInstr::copyFlagsFromInstruction(CI
));
2233 MIRBuilder
.buildInstr(ScalarOpc
, {Dst
}, {ScalarSrc
, Rdx
},
2234 MachineInstr::copyFlagsFromInstruction(CI
));
2238 case Intrinsic::trap
:
2239 case Intrinsic::debugtrap
:
2240 case Intrinsic::ubsantrap
: {
2241 StringRef TrapFuncName
=
2242 CI
.getAttributes().getFnAttr("trap-func-name").getValueAsString();
2243 if (TrapFuncName
.empty())
2244 break; // Use the default handling.
2245 CallLowering::CallLoweringInfo Info
;
2246 if (ID
== Intrinsic::ubsantrap
) {
2247 Info
.OrigArgs
.push_back({getOrCreateVRegs(*CI
.getArgOperand(0)),
2248 CI
.getArgOperand(0)->getType(), 0});
2250 Info
.Callee
= MachineOperand::CreateES(TrapFuncName
.data());
2252 Info
.OrigRet
= {Register(), Type::getVoidTy(CI
.getContext()), 0};
2253 return CLI
->lowerCall(MIRBuilder
, Info
);
2255 case Intrinsic::fptrunc_round
: {
2256 unsigned Flags
= MachineInstr::copyFlagsFromInstruction(CI
);
2258 // Convert the metadata argument to a constant integer
2259 Metadata
*MD
= cast
<MetadataAsValue
>(CI
.getArgOperand(1))->getMetadata();
2260 Optional
<RoundingMode
> RoundMode
=
2261 convertStrToRoundingMode(cast
<MDString
>(MD
)->getString());
2263 // Add the Rounding mode as an integer
2265 .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND
,
2266 {getOrCreateVReg(CI
)},
2267 {getOrCreateVReg(*CI
.getArgOperand(0))}, Flags
)
2268 .addImm((int)RoundMode
.getValue());
2272 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
2273 case Intrinsic::INTRINSIC:
2274 #include "llvm/IR/ConstrainedOps.def"
2275 return translateConstrainedFPIntrinsic(cast
<ConstrainedFPIntrinsic
>(CI
),
2282 bool IRTranslator::translateInlineAsm(const CallBase
&CB
,
2283 MachineIRBuilder
&MIRBuilder
) {
2285 const InlineAsmLowering
*ALI
= MF
->getSubtarget().getInlineAsmLowering();
2289 dbgs() << "Inline asm lowering is not supported for this target yet\n");
2293 return ALI
->lowerInlineAsm(
2294 MIRBuilder
, CB
, [&](const Value
&Val
) { return getOrCreateVRegs(Val
); });
2297 bool IRTranslator::translateCallBase(const CallBase
&CB
,
2298 MachineIRBuilder
&MIRBuilder
) {
2299 ArrayRef
<Register
> Res
= getOrCreateVRegs(CB
);
2301 SmallVector
<ArrayRef
<Register
>, 8> Args
;
2302 Register SwiftInVReg
= 0;
2303 Register SwiftErrorVReg
= 0;
2304 for (auto &Arg
: CB
.args()) {
2305 if (CLI
->supportSwiftError() && isSwiftError(Arg
)) {
2306 assert(SwiftInVReg
== 0 && "Expected only one swift error argument");
2307 LLT Ty
= getLLTForType(*Arg
->getType(), *DL
);
2308 SwiftInVReg
= MRI
->createGenericVirtualRegister(Ty
);
2309 MIRBuilder
.buildCopy(SwiftInVReg
, SwiftError
.getOrCreateVRegUseAt(
2310 &CB
, &MIRBuilder
.getMBB(), Arg
));
2311 Args
.emplace_back(makeArrayRef(SwiftInVReg
));
2313 SwiftError
.getOrCreateVRegDefAt(&CB
, &MIRBuilder
.getMBB(), Arg
);
2316 Args
.push_back(getOrCreateVRegs(*Arg
));
2319 if (auto *CI
= dyn_cast
<CallInst
>(&CB
)) {
2320 if (ORE
->enabled()) {
2321 const Function
&F
= *CI
->getParent()->getParent();
2322 auto &TLI
= getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
2323 if (MemoryOpRemark::canHandle(CI
, TLI
)) {
2324 MemoryOpRemark
R(*ORE
, "gisel-irtranslator-memsize", *DL
, TLI
);
2330 // We don't set HasCalls on MFI here yet because call lowering may decide to
2331 // optimize into tail calls. Instead, we defer that to selection where a final
2332 // scan is done to check if any instructions are calls.
2334 CLI
->lowerCall(MIRBuilder
, CB
, Res
, Args
, SwiftErrorVReg
,
2335 [&]() { return getOrCreateVReg(*CB
.getCalledOperand()); });
2337 // Check if we just inserted a tail call.
2339 assert(!HasTailCall
&& "Can't tail call return twice from block?");
2340 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
2341 HasTailCall
= TII
->isTailCall(*std::prev(MIRBuilder
.getInsertPt()));
2347 bool IRTranslator::translateCall(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
2348 const CallInst
&CI
= cast
<CallInst
>(U
);
2349 auto TII
= MF
->getTarget().getIntrinsicInfo();
2350 const Function
*F
= CI
.getCalledFunction();
2352 // FIXME: support Windows dllimport function calls.
2353 if (F
&& (F
->hasDLLImportStorageClass() ||
2354 (MF
->getTarget().getTargetTriple().isOSWindows() &&
2355 F
->hasExternalWeakLinkage())))
2358 // FIXME: support control flow guard targets.
2359 if (CI
.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget
))
2362 if (CI
.isInlineAsm())
2363 return translateInlineAsm(CI
, MIRBuilder
);
2365 diagnoseDontCall(CI
);
2367 Intrinsic::ID ID
= Intrinsic::not_intrinsic
;
2368 if (F
&& F
->isIntrinsic()) {
2369 ID
= F
->getIntrinsicID();
2370 if (TII
&& ID
== Intrinsic::not_intrinsic
)
2371 ID
= static_cast<Intrinsic::ID
>(TII
->getIntrinsicID(F
));
2374 if (!F
|| !F
->isIntrinsic() || ID
== Intrinsic::not_intrinsic
)
2375 return translateCallBase(CI
, MIRBuilder
);
2377 assert(ID
!= Intrinsic::not_intrinsic
&& "unknown intrinsic");
2379 if (translateKnownIntrinsic(CI
, ID
, MIRBuilder
))
2382 ArrayRef
<Register
> ResultRegs
;
2383 if (!CI
.getType()->isVoidTy())
2384 ResultRegs
= getOrCreateVRegs(CI
);
2386 // Ignore the callsite attributes. Backend code is most likely not expecting
2387 // an intrinsic to sometimes have side effects and sometimes not.
2388 MachineInstrBuilder MIB
=
2389 MIRBuilder
.buildIntrinsic(ID
, ResultRegs
, !F
->doesNotAccessMemory());
2390 if (isa
<FPMathOperator
>(CI
))
2391 MIB
->copyIRFlags(CI
);
2393 for (auto &Arg
: enumerate(CI
.args())) {
2394 // If this is required to be an immediate, don't materialize it in a
2396 if (CI
.paramHasAttr(Arg
.index(), Attribute::ImmArg
)) {
2397 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Arg
.value())) {
2398 // imm arguments are more convenient than cimm (and realistically
2399 // probably sufficient), so use them.
2400 assert(CI
->getBitWidth() <= 64 &&
2401 "large intrinsic immediates not handled");
2402 MIB
.addImm(CI
->getSExtValue());
2404 MIB
.addFPImm(cast
<ConstantFP
>(Arg
.value()));
2406 } else if (auto *MDVal
= dyn_cast
<MetadataAsValue
>(Arg
.value())) {
2407 auto *MD
= MDVal
->getMetadata();
2408 auto *MDN
= dyn_cast
<MDNode
>(MD
);
2410 if (auto *ConstMD
= dyn_cast
<ConstantAsMetadata
>(MD
))
2411 MDN
= MDNode::get(MF
->getFunction().getContext(), ConstMD
);
2412 else // This was probably an MDString.
2415 MIB
.addMetadata(MDN
);
2417 ArrayRef
<Register
> VRegs
= getOrCreateVRegs(*Arg
.value());
2418 if (VRegs
.size() > 1)
2420 MIB
.addUse(VRegs
[0]);
2424 // Add a MachineMemOperand if it is a target mem intrinsic.
2425 const TargetLowering
&TLI
= *MF
->getSubtarget().getTargetLowering();
2426 TargetLowering::IntrinsicInfo Info
;
2427 // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
2428 if (TLI
.getTgtMemIntrinsic(Info
, CI
, *MF
, ID
)) {
2429 Align Alignment
= Info
.align
.getValueOr(
2430 DL
->getABITypeAlign(Info
.memVT
.getTypeForEVT(F
->getContext())));
2431 LLT MemTy
= Info
.memVT
.isSimple()
2432 ? getLLTForMVT(Info
.memVT
.getSimpleVT())
2433 : LLT::scalar(Info
.memVT
.getStoreSizeInBits());
2434 MIB
.addMemOperand(MF
->getMachineMemOperand(MachinePointerInfo(Info
.ptrVal
),
2435 Info
.flags
, MemTy
, Alignment
));
2441 bool IRTranslator::findUnwindDestinations(
2442 const BasicBlock
*EHPadBB
,
2443 BranchProbability Prob
,
2444 SmallVectorImpl
<std::pair
<MachineBasicBlock
*, BranchProbability
>>
2446 EHPersonality Personality
= classifyEHPersonality(
2447 EHPadBB
->getParent()->getFunction().getPersonalityFn());
2448 bool IsMSVCCXX
= Personality
== EHPersonality::MSVC_CXX
;
2449 bool IsCoreCLR
= Personality
== EHPersonality::CoreCLR
;
2450 bool IsWasmCXX
= Personality
== EHPersonality::Wasm_CXX
;
2451 bool IsSEH
= isAsynchronousEHPersonality(Personality
);
2454 // Ignore this for now.
2459 const Instruction
*Pad
= EHPadBB
->getFirstNonPHI();
2460 BasicBlock
*NewEHPadBB
= nullptr;
2461 if (isa
<LandingPadInst
>(Pad
)) {
2462 // Stop on landingpads. They are not funclets.
2463 UnwindDests
.emplace_back(&getMBB(*EHPadBB
), Prob
);
2466 if (isa
<CleanupPadInst
>(Pad
)) {
2467 // Stop on cleanup pads. Cleanups are always funclet entries for all known
2469 UnwindDests
.emplace_back(&getMBB(*EHPadBB
), Prob
);
2470 UnwindDests
.back().first
->setIsEHScopeEntry();
2471 UnwindDests
.back().first
->setIsEHFuncletEntry();
2474 if (auto *CatchSwitch
= dyn_cast
<CatchSwitchInst
>(Pad
)) {
2475 // Add the catchpad handlers to the possible destinations.
2476 for (const BasicBlock
*CatchPadBB
: CatchSwitch
->handlers()) {
2477 UnwindDests
.emplace_back(&getMBB(*CatchPadBB
), Prob
);
2478 // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
2479 if (IsMSVCCXX
|| IsCoreCLR
)
2480 UnwindDests
.back().first
->setIsEHFuncletEntry();
2482 UnwindDests
.back().first
->setIsEHScopeEntry();
2484 NewEHPadBB
= CatchSwitch
->getUnwindDest();
2489 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
2490 if (BPI
&& NewEHPadBB
)
2491 Prob
*= BPI
->getEdgeProbability(EHPadBB
, NewEHPadBB
);
2492 EHPadBB
= NewEHPadBB
;
2497 bool IRTranslator::translateInvoke(const User
&U
,
2498 MachineIRBuilder
&MIRBuilder
) {
2499 const InvokeInst
&I
= cast
<InvokeInst
>(U
);
2500 MCContext
&Context
= MF
->getContext();
2502 const BasicBlock
*ReturnBB
= I
.getSuccessor(0);
2503 const BasicBlock
*EHPadBB
= I
.getSuccessor(1);
2505 const Function
*Fn
= I
.getCalledFunction();
2507 // FIXME: support invoking patchpoint and statepoint intrinsics.
2508 if (Fn
&& Fn
->isIntrinsic())
2511 // FIXME: support whatever these are.
2512 if (I
.countOperandBundlesOfType(LLVMContext::OB_deopt
))
2515 // FIXME: support control flow guard targets.
2516 if (I
.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget
))
2519 // FIXME: support Windows exception handling.
2520 if (!isa
<LandingPadInst
>(EHPadBB
->getFirstNonPHI()))
2523 bool LowerInlineAsm
= I
.isInlineAsm();
2524 bool NeedEHLabel
= true;
2525 // If it can't throw then use a fast-path without emitting EH labels.
2527 NeedEHLabel
= (cast
<InlineAsm
>(I
.getCalledOperand()))->canThrow();
2529 // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
2530 // the region covered by the try.
2531 MCSymbol
*BeginSymbol
= nullptr;
2533 BeginSymbol
= Context
.createTempSymbol();
2534 MIRBuilder
.buildInstr(TargetOpcode::EH_LABEL
).addSym(BeginSymbol
);
2537 if (LowerInlineAsm
) {
2538 if (!translateInlineAsm(I
, MIRBuilder
))
2540 } else if (!translateCallBase(I
, MIRBuilder
))
2543 MCSymbol
*EndSymbol
= nullptr;
2545 EndSymbol
= Context
.createTempSymbol();
2546 MIRBuilder
.buildInstr(TargetOpcode::EH_LABEL
).addSym(EndSymbol
);
2549 SmallVector
<std::pair
<MachineBasicBlock
*, BranchProbability
>, 1> UnwindDests
;
2550 BranchProbabilityInfo
*BPI
= FuncInfo
.BPI
;
2551 MachineBasicBlock
*InvokeMBB
= &MIRBuilder
.getMBB();
2552 BranchProbability EHPadBBProb
=
2553 BPI
? BPI
->getEdgeProbability(InvokeMBB
->getBasicBlock(), EHPadBB
)
2554 : BranchProbability::getZero();
2556 if (!findUnwindDestinations(EHPadBB
, EHPadBBProb
, UnwindDests
))
2559 MachineBasicBlock
&EHPadMBB
= getMBB(*EHPadBB
),
2560 &ReturnMBB
= getMBB(*ReturnBB
);
2561 // Update successor info.
2562 addSuccessorWithProb(InvokeMBB
, &ReturnMBB
);
2563 for (auto &UnwindDest
: UnwindDests
) {
2564 UnwindDest
.first
->setIsEHPad();
2565 addSuccessorWithProb(InvokeMBB
, UnwindDest
.first
, UnwindDest
.second
);
2567 InvokeMBB
->normalizeSuccProbs();
2570 assert(BeginSymbol
&& "Expected a begin symbol!");
2571 assert(EndSymbol
&& "Expected an end symbol!");
2572 MF
->addInvoke(&EHPadMBB
, BeginSymbol
, EndSymbol
);
2575 MIRBuilder
.buildBr(ReturnMBB
);
2579 bool IRTranslator::translateCallBr(const User
&U
,
2580 MachineIRBuilder
&MIRBuilder
) {
2581 // FIXME: Implement this.
2585 bool IRTranslator::translateLandingPad(const User
&U
,
2586 MachineIRBuilder
&MIRBuilder
) {
2587 const LandingPadInst
&LP
= cast
<LandingPadInst
>(U
);
2589 MachineBasicBlock
&MBB
= MIRBuilder
.getMBB();
2593 // If there aren't registers to copy the values into (e.g., during SjLj
2594 // exceptions), then don't bother.
2595 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
2596 const Constant
*PersonalityFn
= MF
->getFunction().getPersonalityFn();
2597 if (TLI
.getExceptionPointerRegister(PersonalityFn
) == 0 &&
2598 TLI
.getExceptionSelectorRegister(PersonalityFn
) == 0)
2601 // If landingpad's return type is token type, we don't create DAG nodes
2602 // for its exception pointer and selector value. The extraction of exception
2603 // pointer or selector value from token type landingpads is not currently
2605 if (LP
.getType()->isTokenTy())
2608 // Add a label to mark the beginning of the landing pad. Deletion of the
2609 // landing pad can thus be detected via the MachineModuleInfo.
2610 MIRBuilder
.buildInstr(TargetOpcode::EH_LABEL
)
2611 .addSym(MF
->addLandingPad(&MBB
));
2613 // If the unwinder does not preserve all registers, ensure that the
2614 // function marks the clobbered registers as used.
2615 const TargetRegisterInfo
&TRI
= *MF
->getSubtarget().getRegisterInfo();
2616 if (auto *RegMask
= TRI
.getCustomEHPadPreservedMask(*MF
))
2617 MF
->getRegInfo().addPhysRegsUsedFromRegMask(RegMask
);
2619 LLT Ty
= getLLTForType(*LP
.getType(), *DL
);
2620 Register Undef
= MRI
->createGenericVirtualRegister(Ty
);
2621 MIRBuilder
.buildUndef(Undef
);
2623 SmallVector
<LLT
, 2> Tys
;
2624 for (Type
*Ty
: cast
<StructType
>(LP
.getType())->elements())
2625 Tys
.push_back(getLLTForType(*Ty
, *DL
));
2626 assert(Tys
.size() == 2 && "Only two-valued landingpads are supported");
2628 // Mark exception register as live in.
2629 Register ExceptionReg
= TLI
.getExceptionPointerRegister(PersonalityFn
);
2633 MBB
.addLiveIn(ExceptionReg
);
2634 ArrayRef
<Register
> ResRegs
= getOrCreateVRegs(LP
);
2635 MIRBuilder
.buildCopy(ResRegs
[0], ExceptionReg
);
2637 Register SelectorReg
= TLI
.getExceptionSelectorRegister(PersonalityFn
);
2641 MBB
.addLiveIn(SelectorReg
);
2642 Register PtrVReg
= MRI
->createGenericVirtualRegister(Tys
[0]);
2643 MIRBuilder
.buildCopy(PtrVReg
, SelectorReg
);
2644 MIRBuilder
.buildCast(ResRegs
[1], PtrVReg
);
2649 bool IRTranslator::translateAlloca(const User
&U
,
2650 MachineIRBuilder
&MIRBuilder
) {
2651 auto &AI
= cast
<AllocaInst
>(U
);
2653 if (AI
.isSwiftError())
2656 if (AI
.isStaticAlloca()) {
2657 Register Res
= getOrCreateVReg(AI
);
2658 int FI
= getOrCreateFrameIndex(AI
);
2659 MIRBuilder
.buildFrameIndex(Res
, FI
);
2663 // FIXME: support stack probing for Windows.
2664 if (MF
->getTarget().getTargetTriple().isOSWindows())
2667 // Now we're in the harder dynamic case.
2668 Register NumElts
= getOrCreateVReg(*AI
.getArraySize());
2669 Type
*IntPtrIRTy
= DL
->getIntPtrType(AI
.getType());
2670 LLT IntPtrTy
= getLLTForType(*IntPtrIRTy
, *DL
);
2671 if (MRI
->getType(NumElts
) != IntPtrTy
) {
2672 Register ExtElts
= MRI
->createGenericVirtualRegister(IntPtrTy
);
2673 MIRBuilder
.buildZExtOrTrunc(ExtElts
, NumElts
);
2677 Type
*Ty
= AI
.getAllocatedType();
2679 Register AllocSize
= MRI
->createGenericVirtualRegister(IntPtrTy
);
2681 getOrCreateVReg(*ConstantInt::get(IntPtrIRTy
, DL
->getTypeAllocSize(Ty
)));
2682 MIRBuilder
.buildMul(AllocSize
, NumElts
, TySize
);
2684 // Round the size of the allocation up to the stack alignment size
2685 // by add SA-1 to the size. This doesn't overflow because we're computing
2686 // an address inside an alloca.
2687 Align StackAlign
= MF
->getSubtarget().getFrameLowering()->getStackAlign();
2688 auto SAMinusOne
= MIRBuilder
.buildConstant(IntPtrTy
, StackAlign
.value() - 1);
2689 auto AllocAdd
= MIRBuilder
.buildAdd(IntPtrTy
, AllocSize
, SAMinusOne
,
2690 MachineInstr::NoUWrap
);
2692 MIRBuilder
.buildConstant(IntPtrTy
, ~(uint64_t)(StackAlign
.value() - 1));
2693 auto AlignedAlloc
= MIRBuilder
.buildAnd(IntPtrTy
, AllocAdd
, AlignCst
);
2695 Align Alignment
= std::max(AI
.getAlign(), DL
->getPrefTypeAlign(Ty
));
2696 if (Alignment
<= StackAlign
)
2697 Alignment
= Align(1);
2698 MIRBuilder
.buildDynStackAlloc(getOrCreateVReg(AI
), AlignedAlloc
, Alignment
);
2700 MF
->getFrameInfo().CreateVariableSizedObject(Alignment
, &AI
);
2701 assert(MF
->getFrameInfo().hasVarSizedObjects());
2705 bool IRTranslator::translateVAArg(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
2706 // FIXME: We may need more info about the type. Because of how LLT works,
2707 // we're completely discarding the i64/double distinction here (amongst
2708 // others). Fortunately the ABIs I know of where that matters don't use va_arg
2709 // anyway but that's not guaranteed.
2710 MIRBuilder
.buildInstr(TargetOpcode::G_VAARG
, {getOrCreateVReg(U
)},
2711 {getOrCreateVReg(*U
.getOperand(0)),
2712 DL
->getABITypeAlign(U
.getType()).value()});
2716 bool IRTranslator::translateUnreachable(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
2717 if (!MF
->getTarget().Options
.TrapUnreachable
)
2720 auto &UI
= cast
<UnreachableInst
>(U
);
2721 // We may be able to ignore unreachable behind a noreturn call.
2722 if (MF
->getTarget().Options
.NoTrapAfterNoreturn
) {
2723 const BasicBlock
&BB
= *UI
.getParent();
2724 if (&UI
!= &BB
.front()) {
2725 BasicBlock::const_iterator PredI
=
2726 std::prev(BasicBlock::const_iterator(UI
));
2727 if (const CallInst
*Call
= dyn_cast
<CallInst
>(&*PredI
)) {
2728 if (Call
->doesNotReturn())
2734 MIRBuilder
.buildIntrinsic(Intrinsic::trap
, ArrayRef
<Register
>(), true);
2738 bool IRTranslator::translateInsertElement(const User
&U
,
2739 MachineIRBuilder
&MIRBuilder
) {
2740 // If it is a <1 x Ty> vector, use the scalar as it is
2741 // not a legal vector type in LLT.
2742 if (cast
<FixedVectorType
>(U
.getType())->getNumElements() == 1)
2743 return translateCopy(U
, *U
.getOperand(1), MIRBuilder
);
2745 Register Res
= getOrCreateVReg(U
);
2746 Register Val
= getOrCreateVReg(*U
.getOperand(0));
2747 Register Elt
= getOrCreateVReg(*U
.getOperand(1));
2748 Register Idx
= getOrCreateVReg(*U
.getOperand(2));
2749 MIRBuilder
.buildInsertVectorElement(Res
, Val
, Elt
, Idx
);
2753 bool IRTranslator::translateExtractElement(const User
&U
,
2754 MachineIRBuilder
&MIRBuilder
) {
2755 // If it is a <1 x Ty> vector, use the scalar as it is
2756 // not a legal vector type in LLT.
2757 if (cast
<FixedVectorType
>(U
.getOperand(0)->getType())->getNumElements() == 1)
2758 return translateCopy(U
, *U
.getOperand(0), MIRBuilder
);
2760 Register Res
= getOrCreateVReg(U
);
2761 Register Val
= getOrCreateVReg(*U
.getOperand(0));
2762 const auto &TLI
= *MF
->getSubtarget().getTargetLowering();
2763 unsigned PreferredVecIdxWidth
= TLI
.getVectorIdxTy(*DL
).getSizeInBits();
2765 if (auto *CI
= dyn_cast
<ConstantInt
>(U
.getOperand(1))) {
2766 if (CI
->getBitWidth() != PreferredVecIdxWidth
) {
2767 APInt NewIdx
= CI
->getValue().sextOrTrunc(PreferredVecIdxWidth
);
2768 auto *NewIdxCI
= ConstantInt::get(CI
->getContext(), NewIdx
);
2769 Idx
= getOrCreateVReg(*NewIdxCI
);
2773 Idx
= getOrCreateVReg(*U
.getOperand(1));
2774 if (MRI
->getType(Idx
).getSizeInBits() != PreferredVecIdxWidth
) {
2775 const LLT VecIdxTy
= LLT::scalar(PreferredVecIdxWidth
);
2776 Idx
= MIRBuilder
.buildSExtOrTrunc(VecIdxTy
, Idx
).getReg(0);
2778 MIRBuilder
.buildExtractVectorElement(Res
, Val
, Idx
);
2782 bool IRTranslator::translateShuffleVector(const User
&U
,
2783 MachineIRBuilder
&MIRBuilder
) {
2785 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&U
))
2786 Mask
= SVI
->getShuffleMask();
2788 Mask
= cast
<ConstantExpr
>(U
).getShuffleMask();
2789 ArrayRef
<int> MaskAlloc
= MF
->allocateShuffleMask(Mask
);
2791 .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR
, {getOrCreateVReg(U
)},
2792 {getOrCreateVReg(*U
.getOperand(0)),
2793 getOrCreateVReg(*U
.getOperand(1))})
2794 .addShuffleMask(MaskAlloc
);
2798 bool IRTranslator::translatePHI(const User
&U
, MachineIRBuilder
&MIRBuilder
) {
2799 const PHINode
&PI
= cast
<PHINode
>(U
);
2801 SmallVector
<MachineInstr
*, 4> Insts
;
2802 for (auto Reg
: getOrCreateVRegs(PI
)) {
2803 auto MIB
= MIRBuilder
.buildInstr(TargetOpcode::G_PHI
, {Reg
}, {});
2804 Insts
.push_back(MIB
.getInstr());
2807 PendingPHIs
.emplace_back(&PI
, std::move(Insts
));
2811 bool IRTranslator::translateAtomicCmpXchg(const User
&U
,
2812 MachineIRBuilder
&MIRBuilder
) {
2813 const AtomicCmpXchgInst
&I
= cast
<AtomicCmpXchgInst
>(U
);
2815 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
2816 auto Flags
= TLI
.getAtomicMemOperandFlags(I
, *DL
);
2818 auto Res
= getOrCreateVRegs(I
);
2819 Register OldValRes
= Res
[0];
2820 Register SuccessRes
= Res
[1];
2821 Register Addr
= getOrCreateVReg(*I
.getPointerOperand());
2822 Register Cmp
= getOrCreateVReg(*I
.getCompareOperand());
2823 Register NewVal
= getOrCreateVReg(*I
.getNewValOperand());
2825 MIRBuilder
.buildAtomicCmpXchgWithSuccess(
2826 OldValRes
, SuccessRes
, Addr
, Cmp
, NewVal
,
2827 *MF
->getMachineMemOperand(
2828 MachinePointerInfo(I
.getPointerOperand()), Flags
, MRI
->getType(Cmp
),
2829 getMemOpAlign(I
), I
.getAAMetadata(), nullptr, I
.getSyncScopeID(),
2830 I
.getSuccessOrdering(), I
.getFailureOrdering()));
2834 bool IRTranslator::translateAtomicRMW(const User
&U
,
2835 MachineIRBuilder
&MIRBuilder
) {
2836 const AtomicRMWInst
&I
= cast
<AtomicRMWInst
>(U
);
2837 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
2838 auto Flags
= TLI
.getAtomicMemOperandFlags(I
, *DL
);
2840 Register Res
= getOrCreateVReg(I
);
2841 Register Addr
= getOrCreateVReg(*I
.getPointerOperand());
2842 Register Val
= getOrCreateVReg(*I
.getValOperand());
2844 unsigned Opcode
= 0;
2845 switch (I
.getOperation()) {
2848 case AtomicRMWInst::Xchg
:
2849 Opcode
= TargetOpcode::G_ATOMICRMW_XCHG
;
2851 case AtomicRMWInst::Add
:
2852 Opcode
= TargetOpcode::G_ATOMICRMW_ADD
;
2854 case AtomicRMWInst::Sub
:
2855 Opcode
= TargetOpcode::G_ATOMICRMW_SUB
;
2857 case AtomicRMWInst::And
:
2858 Opcode
= TargetOpcode::G_ATOMICRMW_AND
;
2860 case AtomicRMWInst::Nand
:
2861 Opcode
= TargetOpcode::G_ATOMICRMW_NAND
;
2863 case AtomicRMWInst::Or
:
2864 Opcode
= TargetOpcode::G_ATOMICRMW_OR
;
2866 case AtomicRMWInst::Xor
:
2867 Opcode
= TargetOpcode::G_ATOMICRMW_XOR
;
2869 case AtomicRMWInst::Max
:
2870 Opcode
= TargetOpcode::G_ATOMICRMW_MAX
;
2872 case AtomicRMWInst::Min
:
2873 Opcode
= TargetOpcode::G_ATOMICRMW_MIN
;
2875 case AtomicRMWInst::UMax
:
2876 Opcode
= TargetOpcode::G_ATOMICRMW_UMAX
;
2878 case AtomicRMWInst::UMin
:
2879 Opcode
= TargetOpcode::G_ATOMICRMW_UMIN
;
2881 case AtomicRMWInst::FAdd
:
2882 Opcode
= TargetOpcode::G_ATOMICRMW_FADD
;
2884 case AtomicRMWInst::FSub
:
2885 Opcode
= TargetOpcode::G_ATOMICRMW_FSUB
;
2889 MIRBuilder
.buildAtomicRMW(
2890 Opcode
, Res
, Addr
, Val
,
2891 *MF
->getMachineMemOperand(MachinePointerInfo(I
.getPointerOperand()),
2892 Flags
, MRI
->getType(Val
), getMemOpAlign(I
),
2893 I
.getAAMetadata(), nullptr, I
.getSyncScopeID(),
2898 bool IRTranslator::translateFence(const User
&U
,
2899 MachineIRBuilder
&MIRBuilder
) {
2900 const FenceInst
&Fence
= cast
<FenceInst
>(U
);
2901 MIRBuilder
.buildFence(static_cast<unsigned>(Fence
.getOrdering()),
2902 Fence
.getSyncScopeID());
2906 bool IRTranslator::translateFreeze(const User
&U
,
2907 MachineIRBuilder
&MIRBuilder
) {
2908 const ArrayRef
<Register
> DstRegs
= getOrCreateVRegs(U
);
2909 const ArrayRef
<Register
> SrcRegs
= getOrCreateVRegs(*U
.getOperand(0));
2911 assert(DstRegs
.size() == SrcRegs
.size() &&
2912 "Freeze with different source and destination type?");
2914 for (unsigned I
= 0; I
< DstRegs
.size(); ++I
) {
2915 MIRBuilder
.buildFreeze(DstRegs
[I
], SrcRegs
[I
]);
2921 void IRTranslator::finishPendingPhis() {
2923 DILocationVerifier Verifier
;
2924 GISelObserverWrapper
WrapperObserver(&Verifier
);
2925 RAIIDelegateInstaller
DelInstall(*MF
, &WrapperObserver
);
2926 #endif // ifndef NDEBUG
2927 for (auto &Phi
: PendingPHIs
) {
2928 const PHINode
*PI
= Phi
.first
;
2929 ArrayRef
<MachineInstr
*> ComponentPHIs
= Phi
.second
;
2930 MachineBasicBlock
*PhiMBB
= ComponentPHIs
[0]->getParent();
2931 EntryBuilder
->setDebugLoc(PI
->getDebugLoc());
2933 Verifier
.setCurrentInst(PI
);
2934 #endif // ifndef NDEBUG
2936 SmallSet
<const MachineBasicBlock
*, 16> SeenPreds
;
2937 for (unsigned i
= 0; i
< PI
->getNumIncomingValues(); ++i
) {
2938 auto IRPred
= PI
->getIncomingBlock(i
);
2939 ArrayRef
<Register
> ValRegs
= getOrCreateVRegs(*PI
->getIncomingValue(i
));
2940 for (auto Pred
: getMachinePredBBs({IRPred
, PI
->getParent()})) {
2941 if (SeenPreds
.count(Pred
) || !PhiMBB
->isPredecessor(Pred
))
2943 SeenPreds
.insert(Pred
);
2944 for (unsigned j
= 0; j
< ValRegs
.size(); ++j
) {
2945 MachineInstrBuilder
MIB(*MF
, ComponentPHIs
[j
]);
2946 MIB
.addUse(ValRegs
[j
]);
2954 bool IRTranslator::valueIsSplit(const Value
&V
,
2955 SmallVectorImpl
<uint64_t> *Offsets
) {
2956 SmallVector
<LLT
, 4> SplitTys
;
2957 if (Offsets
&& !Offsets
->empty())
2959 computeValueLLTs(*DL
, *V
.getType(), SplitTys
, Offsets
);
2960 return SplitTys
.size() > 1;
2963 bool IRTranslator::translate(const Instruction
&Inst
) {
2964 CurBuilder
->setDebugLoc(Inst
.getDebugLoc());
2966 auto &TLI
= *MF
->getSubtarget().getTargetLowering();
2967 if (TLI
.fallBackToDAGISel(Inst
))
2970 switch (Inst
.getOpcode()) {
2971 #define HANDLE_INST(NUM, OPCODE, CLASS) \
2972 case Instruction::OPCODE: \
2973 return translate##OPCODE(Inst, *CurBuilder.get());
2974 #include "llvm/IR/Instruction.def"
2980 bool IRTranslator::translate(const Constant
&C
, Register Reg
) {
2981 // We only emit constants into the entry block from here. To prevent jumpy
2982 // debug behaviour set the line to 0.
2983 if (auto CurrInstDL
= CurBuilder
->getDL())
2984 EntryBuilder
->setDebugLoc(DILocation::get(C
.getContext(), 0, 0,
2985 CurrInstDL
.getScope(),
2986 CurrInstDL
.getInlinedAt()));
2988 if (auto CI
= dyn_cast
<ConstantInt
>(&C
))
2989 EntryBuilder
->buildConstant(Reg
, *CI
);
2990 else if (auto CF
= dyn_cast
<ConstantFP
>(&C
))
2991 EntryBuilder
->buildFConstant(Reg
, *CF
);
2992 else if (isa
<UndefValue
>(C
))
2993 EntryBuilder
->buildUndef(Reg
);
2994 else if (isa
<ConstantPointerNull
>(C
))
2995 EntryBuilder
->buildConstant(Reg
, 0);
2996 else if (auto GV
= dyn_cast
<GlobalValue
>(&C
))
2997 EntryBuilder
->buildGlobalValue(Reg
, GV
);
2998 else if (auto CAZ
= dyn_cast
<ConstantAggregateZero
>(&C
)) {
2999 if (!isa
<FixedVectorType
>(CAZ
->getType()))
3001 // Return the scalar if it is a <1 x Ty> vector.
3002 unsigned NumElts
= CAZ
->getElementCount().getFixedValue();
3004 return translateCopy(C
, *CAZ
->getElementValue(0u), *EntryBuilder
.get());
3005 SmallVector
<Register
, 4> Ops
;
3006 for (unsigned I
= 0; I
< NumElts
; ++I
) {
3007 Constant
&Elt
= *CAZ
->getElementValue(I
);
3008 Ops
.push_back(getOrCreateVReg(Elt
));
3010 EntryBuilder
->buildBuildVector(Reg
, Ops
);
3011 } else if (auto CV
= dyn_cast
<ConstantDataVector
>(&C
)) {
3012 // Return the scalar if it is a <1 x Ty> vector.
3013 if (CV
->getNumElements() == 1)
3014 return translateCopy(C
, *CV
->getElementAsConstant(0),
3015 *EntryBuilder
.get());
3016 SmallVector
<Register
, 4> Ops
;
3017 for (unsigned i
= 0; i
< CV
->getNumElements(); ++i
) {
3018 Constant
&Elt
= *CV
->getElementAsConstant(i
);
3019 Ops
.push_back(getOrCreateVReg(Elt
));
3021 EntryBuilder
->buildBuildVector(Reg
, Ops
);
3022 } else if (auto CE
= dyn_cast
<ConstantExpr
>(&C
)) {
3023 switch(CE
->getOpcode()) {
3024 #define HANDLE_INST(NUM, OPCODE, CLASS) \
3025 case Instruction::OPCODE: \
3026 return translate##OPCODE(*CE, *EntryBuilder.get());
3027 #include "llvm/IR/Instruction.def"
3031 } else if (auto CV
= dyn_cast
<ConstantVector
>(&C
)) {
3032 if (CV
->getNumOperands() == 1)
3033 return translateCopy(C
, *CV
->getOperand(0), *EntryBuilder
.get());
3034 SmallVector
<Register
, 4> Ops
;
3035 for (unsigned i
= 0; i
< CV
->getNumOperands(); ++i
) {
3036 Ops
.push_back(getOrCreateVReg(*CV
->getOperand(i
)));
3038 EntryBuilder
->buildBuildVector(Reg
, Ops
);
3039 } else if (auto *BA
= dyn_cast
<BlockAddress
>(&C
)) {
3040 EntryBuilder
->buildBlockAddress(Reg
, BA
);
3047 bool IRTranslator::finalizeBasicBlock(const BasicBlock
&BB
,
3048 MachineBasicBlock
&MBB
) {
3049 for (auto &BTB
: SL
->BitTestCases
) {
3050 // Emit header first, if it wasn't already emitted.
3052 emitBitTestHeader(BTB
, BTB
.Parent
);
3054 BranchProbability UnhandledProb
= BTB
.Prob
;
3055 for (unsigned j
= 0, ej
= BTB
.Cases
.size(); j
!= ej
; ++j
) {
3056 UnhandledProb
-= BTB
.Cases
[j
].ExtraProb
;
3057 // Set the current basic block to the mbb we wish to insert the code into
3058 MachineBasicBlock
*MBB
= BTB
.Cases
[j
].ThisBB
;
3059 // If all cases cover a contiguous range, it is not necessary to jump to
3060 // the default block after the last bit test fails. This is because the
3061 // range check during bit test header creation has guaranteed that every
3062 // case here doesn't go outside the range. In this case, there is no need
3063 // to perform the last bit test, as it will always be true. Instead, make
3064 // the second-to-last bit-test fall through to the target of the last bit
3065 // test, and delete the last bit test.
3067 MachineBasicBlock
*NextMBB
;
3068 if ((BTB
.ContiguousRange
|| BTB
.FallthroughUnreachable
) && j
+ 2 == ej
) {
3069 // Second-to-last bit-test with contiguous range: fall through to the
3070 // target of the final bit test.
3071 NextMBB
= BTB
.Cases
[j
+ 1].TargetBB
;
3072 } else if (j
+ 1 == ej
) {
3073 // For the last bit test, fall through to Default.
3074 NextMBB
= BTB
.Default
;
3076 // Otherwise, fall through to the next bit test.
3077 NextMBB
= BTB
.Cases
[j
+ 1].ThisBB
;
3080 emitBitTestCase(BTB
, NextMBB
, UnhandledProb
, BTB
.Reg
, BTB
.Cases
[j
], MBB
);
3082 if ((BTB
.ContiguousRange
|| BTB
.FallthroughUnreachable
) && j
+ 2 == ej
) {
3083 // We need to record the replacement phi edge here that normally
3084 // happens in emitBitTestCase before we delete the case, otherwise the
3085 // phi edge will be lost.
3086 addMachineCFGPred({BTB
.Parent
->getBasicBlock(),
3087 BTB
.Cases
[ej
- 1].TargetBB
->getBasicBlock()},
3089 // Since we're not going to use the final bit test, remove it.
3090 BTB
.Cases
.pop_back();
3094 // This is "default" BB. We have two jumps to it. From "header" BB and from
3095 // last "case" BB, unless the latter was skipped.
3096 CFGEdge HeaderToDefaultEdge
= {BTB
.Parent
->getBasicBlock(),
3097 BTB
.Default
->getBasicBlock()};
3098 addMachineCFGPred(HeaderToDefaultEdge
, BTB
.Parent
);
3099 if (!BTB
.ContiguousRange
) {
3100 addMachineCFGPred(HeaderToDefaultEdge
, BTB
.Cases
.back().ThisBB
);
3103 SL
->BitTestCases
.clear();
3105 for (auto &JTCase
: SL
->JTCases
) {
3106 // Emit header first, if it wasn't already emitted.
3107 if (!JTCase
.first
.Emitted
)
3108 emitJumpTableHeader(JTCase
.second
, JTCase
.first
, JTCase
.first
.HeaderBB
);
3110 emitJumpTable(JTCase
.second
, JTCase
.second
.MBB
);
3112 SL
->JTCases
.clear();
3114 for (auto &SwCase
: SL
->SwitchCases
)
3115 emitSwitchCase(SwCase
, &CurBuilder
->getMBB(), *CurBuilder
);
3116 SL
->SwitchCases
.clear();
3118 // Check if we need to generate stack-protector guard checks.
3119 StackProtector
&SP
= getAnalysis
<StackProtector
>();
3120 if (SP
.shouldEmitSDCheck(BB
)) {
3121 const TargetLowering
&TLI
= *MF
->getSubtarget().getTargetLowering();
3122 bool FunctionBasedInstrumentation
=
3123 TLI
.getSSPStackGuardCheck(*MF
->getFunction().getParent());
3124 SPDescriptor
.initialize(&BB
, &MBB
, FunctionBasedInstrumentation
);
3126 // Handle stack protector.
3127 if (SPDescriptor
.shouldEmitFunctionBasedCheckStackProtector()) {
3128 LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
3130 } else if (SPDescriptor
.shouldEmitStackProtector()) {
3131 MachineBasicBlock
*ParentMBB
= SPDescriptor
.getParentMBB();
3132 MachineBasicBlock
*SuccessMBB
= SPDescriptor
.getSuccessMBB();
3134 // Find the split point to split the parent mbb. At the same time copy all
3135 // physical registers used in the tail of parent mbb into virtual registers
3136 // before the split point and back into physical registers after the split
3137 // point. This prevents us needing to deal with Live-ins and many other
3138 // register allocation issues caused by us splitting the parent mbb. The
3139 // register allocator will clean up said virtual copies later on.
3140 MachineBasicBlock::iterator SplitPoint
= findSplitPointForStackProtector(
3141 ParentMBB
, *MF
->getSubtarget().getInstrInfo());
3143 // Splice the terminator of ParentMBB into SuccessMBB.
3144 SuccessMBB
->splice(SuccessMBB
->end(), ParentMBB
, SplitPoint
,
3147 // Add compare/jump on neq/jump to the parent BB.
3148 if (!emitSPDescriptorParent(SPDescriptor
, ParentMBB
))
3151 // CodeGen Failure MBB if we have not codegened it yet.
3152 MachineBasicBlock
*FailureMBB
= SPDescriptor
.getFailureMBB();
3153 if (FailureMBB
->empty()) {
3154 if (!emitSPDescriptorFailure(SPDescriptor
, FailureMBB
))
3158 // Clear the Per-BB State.
3159 SPDescriptor
.resetPerBBState();
3164 bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor
&SPD
,
3165 MachineBasicBlock
*ParentBB
) {
3166 CurBuilder
->setInsertPt(*ParentBB
, ParentBB
->end());
3167 // First create the loads to the guard/stack slot for the comparison.
3168 const TargetLowering
&TLI
= *MF
->getSubtarget().getTargetLowering();
3169 Type
*PtrIRTy
= Type::getInt8PtrTy(MF
->getFunction().getContext());
3170 const LLT PtrTy
= getLLTForType(*PtrIRTy
, *DL
);
3171 LLT PtrMemTy
= getLLTForMVT(TLI
.getPointerMemTy(*DL
));
3173 MachineFrameInfo
&MFI
= ParentBB
->getParent()->getFrameInfo();
3174 int FI
= MFI
.getStackProtectorIndex();
3177 Register StackSlotPtr
= CurBuilder
->buildFrameIndex(PtrTy
, FI
).getReg(0);
3178 const Module
&M
= *ParentBB
->getParent()->getFunction().getParent();
3179 Align Align
= DL
->getPrefTypeAlign(Type::getInt8PtrTy(M
.getContext()));
3181 // Generate code to load the content of the guard slot.
3184 ->buildLoad(PtrMemTy
, StackSlotPtr
,
3185 MachinePointerInfo::getFixedStack(*MF
, FI
), Align
,
3186 MachineMemOperand::MOLoad
| MachineMemOperand::MOVolatile
)
3189 if (TLI
.useStackGuardXorFP()) {
3190 LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
3194 // Retrieve guard check function, nullptr if instrumentation is inlined.
3195 if (const Function
*GuardCheckFn
= TLI
.getSSPStackGuardCheck(M
)) {
3196 // This path is currently untestable on GlobalISel, since the only platform
3197 // that needs this seems to be Windows, and we fall back on that currently.
3198 // The code still lives here in case that changes.
3199 // Silence warning about unused variable until the code below that uses
3200 // 'GuardCheckFn' is enabled.
3204 // The target provides a guard check function to validate the guard value.
3205 // Generate a call to that function with the content of the guard slot as
3207 FunctionType
*FnTy
= GuardCheckFn
->getFunctionType();
3208 assert(FnTy
->getNumParams() == 1 && "Invalid function signature");
3209 ISD::ArgFlagsTy Flags
;
3210 if (GuardCheckFn
->hasAttribute(1, Attribute::AttrKind::InReg
))
3212 CallLowering::ArgInfo
GuardArgInfo(
3213 {GuardVal
, FnTy
->getParamType(0), {Flags
}});
3215 CallLowering::CallLoweringInfo Info
;
3216 Info
.OrigArgs
.push_back(GuardArgInfo
);
3217 Info
.CallConv
= GuardCheckFn
->getCallingConv();
3218 Info
.Callee
= MachineOperand::CreateGA(GuardCheckFn
, 0);
3219 Info
.OrigRet
= {Register(), FnTy
->getReturnType()};
3220 if (!CLI
->lowerCall(MIRBuilder
, Info
)) {
3221 LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
3228 // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
3229 // Otherwise, emit a volatile load to retrieve the stack guard value.
3230 if (TLI
.useLoadStackGuardNode()) {
3232 MRI
->createGenericVirtualRegister(LLT::scalar(PtrTy
.getSizeInBits()));
3233 getStackGuard(Guard
, *CurBuilder
);
3235 // TODO: test using android subtarget when we support @llvm.thread.pointer.
3236 const Value
*IRGuard
= TLI
.getSDagStackGuard(M
);
3237 Register GuardPtr
= getOrCreateVReg(*IRGuard
);
3240 ->buildLoad(PtrMemTy
, GuardPtr
,
3241 MachinePointerInfo::getFixedStack(*MF
, FI
), Align
,
3242 MachineMemOperand::MOLoad
|
3243 MachineMemOperand::MOVolatile
)
3247 // Perform the comparison.
3249 CurBuilder
->buildICmp(CmpInst::ICMP_NE
, LLT::scalar(1), Guard
, GuardVal
);
3250 // If the guard/stackslot do not equal, branch to failure MBB.
3251 CurBuilder
->buildBrCond(Cmp
, *SPD
.getFailureMBB());
3252 // Otherwise branch to success MBB.
3253 CurBuilder
->buildBr(*SPD
.getSuccessMBB());
3257 bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor
&SPD
,
3258 MachineBasicBlock
*FailureBB
) {
3259 CurBuilder
->setInsertPt(*FailureBB
, FailureBB
->end());
3260 const TargetLowering
&TLI
= *MF
->getSubtarget().getTargetLowering();
3262 const RTLIB::Libcall Libcall
= RTLIB::STACKPROTECTOR_CHECK_FAIL
;
3263 const char *Name
= TLI
.getLibcallName(Libcall
);
3265 CallLowering::CallLoweringInfo Info
;
3266 Info
.CallConv
= TLI
.getLibcallCallingConv(Libcall
);
3267 Info
.Callee
= MachineOperand::CreateES(Name
);
3268 Info
.OrigRet
= {Register(), Type::getVoidTy(MF
->getFunction().getContext()),
3270 if (!CLI
->lowerCall(*CurBuilder
, Info
)) {
3271 LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
3275 // On PS4, the "return address" must still be within the calling function,
3276 // even if it's at the very end, so emit an explicit TRAP here.
3277 // Passing 'true' for doesNotReturn above won't generate the trap for us.
3278 // WebAssembly needs an unreachable instruction after a non-returning call,
3279 // because the function return type can be different from __stack_chk_fail's
3280 // return type (void).
3281 const TargetMachine
&TM
= MF
->getTarget();
3282 if (TM
.getTargetTriple().isPS4() || TM
.getTargetTriple().isWasm()) {
3283 LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
3289 void IRTranslator::finalizeFunction() {
3290 // Release the memory used by the different maps we
3291 // needed during the translation.
3292 PendingPHIs
.clear();
3294 FrameIndices
.clear();
3295 MachinePreds
.clear();
3296 // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
3297 // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
3298 // destroying it twice (in ~IRTranslator() and ~LLVMContext())
3299 EntryBuilder
.reset();
3302 SPDescriptor
.resetPerFunctionState();
3305 /// Returns true if a BasicBlock \p BB within a variadic function contains a
3306 /// variadic musttail call.
3307 static bool checkForMustTailInVarArgFn(bool IsVarArg
, const BasicBlock
&BB
) {
3311 // Walk the block backwards, because tail calls usually only appear at the end
3313 return llvm::any_of(llvm::reverse(BB
), [](const Instruction
&I
) {
3314 const auto *CI
= dyn_cast
<CallInst
>(&I
);
3315 return CI
&& CI
->isMustTailCall();
3319 bool IRTranslator::runOnMachineFunction(MachineFunction
&CurMF
) {
3321 const Function
&F
= MF
->getFunction();
3322 GISelCSEAnalysisWrapper
&Wrapper
=
3323 getAnalysis
<GISelCSEAnalysisWrapperPass
>().getCSEWrapper();
3324 // Set the CSEConfig and run the analysis.
3325 GISelCSEInfo
*CSEInfo
= nullptr;
3326 TPC
= &getAnalysis
<TargetPassConfig
>();
3327 bool EnableCSE
= EnableCSEInIRTranslator
.getNumOccurrences()
3328 ? EnableCSEInIRTranslator
3329 : TPC
->isGISelCSEEnabled();
3332 EntryBuilder
= std::make_unique
<CSEMIRBuilder
>(CurMF
);
3333 CSEInfo
= &Wrapper
.get(TPC
->getCSEConfig());
3334 EntryBuilder
->setCSEInfo(CSEInfo
);
3335 CurBuilder
= std::make_unique
<CSEMIRBuilder
>(CurMF
);
3336 CurBuilder
->setCSEInfo(CSEInfo
);
3338 EntryBuilder
= std::make_unique
<MachineIRBuilder
>();
3339 CurBuilder
= std::make_unique
<MachineIRBuilder
>();
3341 CLI
= MF
->getSubtarget().getCallLowering();
3342 CurBuilder
->setMF(*MF
);
3343 EntryBuilder
->setMF(*MF
);
3344 MRI
= &MF
->getRegInfo();
3345 DL
= &F
.getParent()->getDataLayout();
3346 ORE
= std::make_unique
<OptimizationRemarkEmitter
>(&F
);
3347 const TargetMachine
&TM
= MF
->getTarget();
3348 TM
.resetTargetOptions(F
);
3349 EnableOpts
= OptLevel
!= CodeGenOpt::None
&& !skipFunction(F
);
3352 FuncInfo
.BPI
= &getAnalysis
<BranchProbabilityInfoWrapperPass
>().getBPI();
3354 FuncInfo
.BPI
= nullptr;
3356 FuncInfo
.CanLowerReturn
= CLI
->checkReturnTypeForCallConv(*MF
);
3358 const auto &TLI
= *MF
->getSubtarget().getTargetLowering();
3360 SL
= std::make_unique
<GISelSwitchLowering
>(this, FuncInfo
);
3361 SL
->init(TLI
, TM
, *DL
);
3365 assert(PendingPHIs
.empty() && "stale PHIs");
3367 // Targets which want to use big endian can enable it using
3368 // enableBigEndian()
3369 if (!DL
->isLittleEndian() && !CLI
->enableBigEndian()) {
3370 // Currently we don't properly handle big endian code.
3371 OptimizationRemarkMissed
R("gisel-irtranslator", "GISelFailure",
3372 F
.getSubprogram(), &F
.getEntryBlock());
3373 R
<< "unable to translate in big endian mode";
3374 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
3377 // Release the per-function state when we return, whether we succeeded or not.
3378 auto FinalizeOnReturn
= make_scope_exit([this]() { finalizeFunction(); });
3380 // Setup a separate basic-block for the arguments and constants
3381 MachineBasicBlock
*EntryBB
= MF
->CreateMachineBasicBlock();
3382 MF
->push_back(EntryBB
);
3383 EntryBuilder
->setMBB(*EntryBB
);
3385 DebugLoc DbgLoc
= F
.getEntryBlock().getFirstNonPHI()->getDebugLoc();
3386 SwiftError
.setFunction(CurMF
);
3387 SwiftError
.createEntriesInEntryBlock(DbgLoc
);
3389 bool IsVarArg
= F
.isVarArg();
3390 bool HasMustTailInVarArgFn
= false;
3392 // Create all blocks, in IR order, to preserve the layout.
3393 for (const BasicBlock
&BB
: F
) {
3394 auto *&MBB
= BBToMBB
[&BB
];
3396 MBB
= MF
->CreateMachineBasicBlock(&BB
);
3399 if (BB
.hasAddressTaken())
3400 MBB
->setHasAddressTaken();
3402 if (!HasMustTailInVarArgFn
)
3403 HasMustTailInVarArgFn
= checkForMustTailInVarArgFn(IsVarArg
, BB
);
3406 MF
->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn
);
3408 // Make our arguments/constants entry block fallthrough to the IR entry block.
3409 EntryBB
->addSuccessor(&getMBB(F
.front()));
3411 if (CLI
->fallBackToDAGISel(*MF
)) {
3412 OptimizationRemarkMissed
R("gisel-irtranslator", "GISelFailure",
3413 F
.getSubprogram(), &F
.getEntryBlock());
3414 R
<< "unable to lower function: " << ore::NV("Prototype", F
.getType());
3415 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
3419 // Lower the actual args into this basic block.
3420 SmallVector
<ArrayRef
<Register
>, 8> VRegArgs
;
3421 for (const Argument
&Arg
: F
.args()) {
3422 if (DL
->getTypeStoreSize(Arg
.getType()).isZero())
3423 continue; // Don't handle zero sized types.
3424 ArrayRef
<Register
> VRegs
= getOrCreateVRegs(Arg
);
3425 VRegArgs
.push_back(VRegs
);
3427 if (Arg
.hasSwiftErrorAttr()) {
3428 assert(VRegs
.size() == 1 && "Too many vregs for Swift error");
3429 SwiftError
.setCurrentVReg(EntryBB
, SwiftError
.getFunctionArg(), VRegs
[0]);
3433 if (!CLI
->lowerFormalArguments(*EntryBuilder
.get(), F
, VRegArgs
, FuncInfo
)) {
3434 OptimizationRemarkMissed
R("gisel-irtranslator", "GISelFailure",
3435 F
.getSubprogram(), &F
.getEntryBlock());
3436 R
<< "unable to lower arguments: " << ore::NV("Prototype", F
.getType());
3437 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
3441 // Need to visit defs before uses when translating instructions.
3442 GISelObserverWrapper WrapperObserver
;
3443 if (EnableCSE
&& CSEInfo
)
3444 WrapperObserver
.addObserver(CSEInfo
);
3446 ReversePostOrderTraversal
<const Function
*> RPOT(&F
);
3448 DILocationVerifier Verifier
;
3449 WrapperObserver
.addObserver(&Verifier
);
3450 #endif // ifndef NDEBUG
3451 RAIIDelegateInstaller
DelInstall(*MF
, &WrapperObserver
);
3452 RAIIMFObserverInstaller
ObsInstall(*MF
, WrapperObserver
);
3453 for (const BasicBlock
*BB
: RPOT
) {
3454 MachineBasicBlock
&MBB
= getMBB(*BB
);
3455 // Set the insertion point of all the following translations to
3456 // the end of this basic block.
3457 CurBuilder
->setMBB(MBB
);
3458 HasTailCall
= false;
3459 for (const Instruction
&Inst
: *BB
) {
3460 // If we translated a tail call in the last step, then we know
3461 // everything after the call is either a return, or something that is
3462 // handled by the call itself. (E.g. a lifetime marker or assume
3463 // intrinsic.) In this case, we should stop translating the block and
3468 Verifier
.setCurrentInst(&Inst
);
3469 #endif // ifndef NDEBUG
3470 if (translate(Inst
))
3473 OptimizationRemarkMissed
R("gisel-irtranslator", "GISelFailure",
3474 Inst
.getDebugLoc(), BB
);
3475 R
<< "unable to translate instruction: " << ore::NV("Opcode", &Inst
);
3477 if (ORE
->allowExtraAnalysis("gisel-irtranslator")) {
3478 std::string InstStrStorage
;
3479 raw_string_ostream
InstStr(InstStrStorage
);
3482 R
<< ": '" << InstStr
.str() << "'";
3485 reportTranslationError(*MF
, *TPC
, *ORE
, R
);
3489 if (!finalizeBasicBlock(*BB
, MBB
))
3493 WrapperObserver
.removeObserver(&Verifier
);
3497 finishPendingPhis();
3499 SwiftError
.propagateVRegs();
3501 // Merge the argument lowering and constants block with its single
3502 // successor, the LLVM-IR entry block. We want the basic block to
3504 assert(EntryBB
->succ_size() == 1 &&
3505 "Custom BB used for lowering should have only one successor");
3506 // Get the successor of the current entry block.
3507 MachineBasicBlock
&NewEntryBB
= **EntryBB
->succ_begin();
3508 assert(NewEntryBB
.pred_size() == 1 &&
3509 "LLVM-IR entry block has a predecessor!?");
3510 // Move all the instruction from the current entry block to the
3512 NewEntryBB
.splice(NewEntryBB
.begin(), EntryBB
, EntryBB
->begin(),
3515 // Update the live-in information for the new entry block.
3516 for (const MachineBasicBlock::RegisterMaskPair
&LiveIn
: EntryBB
->liveins())
3517 NewEntryBB
.addLiveIn(LiveIn
);
3518 NewEntryBB
.sortUniqueLiveIns();
3520 // Get rid of the now empty basic block.
3521 EntryBB
->removeSuccessor(&NewEntryBB
);
3522 MF
->remove(EntryBB
);
3523 MF
->deleteMachineBasicBlock(EntryBB
);
3525 assert(&MF
->front() == &NewEntryBB
&&
3526 "New entry wasn't next in the list of basic block!");
3528 // Initialize stack protector information.
3529 StackProtector
&SP
= getAnalysis
<StackProtector
>();
3530 SP
.copyToMachineFrameInfo(MF
->getFrameInfo());