1 //===----- TypePromotion.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This is an opcode based type promotion pass for small types that would
11 /// otherwise be promoted during legalisation. This works around the limitations
12 /// of selection dag for cyclic regions. The search begins from icmp
13 /// instructions operands where a tree, consisting of non-wrapping or safe
14 /// wrapping instructions, is built, checked and promoted if possible.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/CodeGen/TypePromotion.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Target/TargetMachine.h"
42 #define DEBUG_TYPE "type-promotion"
43 #define PASS_NAME "Type Promotion"
47 static cl::opt
<bool> DisablePromotion("disable-type-promotion", cl::Hidden
,
49 cl::desc("Disable type promotion pass"));
51 // The goal of this pass is to enable more efficient code generation for
52 // operations on narrow types (i.e. types with < 32-bits) and this is a
53 // motivating IR code example:
55 // define hidden i32 @cmp(i8 zeroext) {
56 // %2 = add i8 %0, -49
57 // %3 = icmp ult i8 %2, 3
61 // The issue here is that i8 is type-legalized to i32 because i8 is not a
62 // legal type. Thus, arithmetic is done in integer-precision, but then the
63 // byte value is masked out as follows:
65 // t19: i32 = add t4, Constant:i32<-49>
66 // t24: i32 = and t19, Constant:i32<255>
68 // Consequently, we generate code like this:
74 // This shows that masking out the byte value results in generation of
75 // the UXTB instruction. This is not optimal as r0 already contains the byte
76 // value we need, and so instead we can just generate:
81 // We achieve this by type promoting the IR to i32 like so for this example:
83 // define i32 @cmp(i8 zeroext %c) {
84 // %0 = zext i8 %c to i32
85 // %c.off = add i32 %0, -49
86 // %1 = icmp ult i32 %c.off, 3
90 // For this to be valid and legal, we need to prove that the i32 add is
91 // producing the same value as the i8 addition, and that e.g. no overflow
94 // A brief sketch of the algorithm and some terminology.
95 // We pattern match interesting IR patterns:
96 // - which have "sources": instructions producing narrow values (i8, i16), and
97 // - they have "sinks": instructions consuming these narrow values.
99 // We collect all instruction connecting sources and sinks in a worklist, so
100 // that we can mutate these instruction and perform type promotion when it is
106 unsigned PromotedWidth
= 0;
107 SetVector
<Value
*> &Visited
;
108 SetVector
<Value
*> &Sources
;
109 SetVector
<Instruction
*> &Sinks
;
110 SmallPtrSetImpl
<Instruction
*> &SafeWrap
;
111 SmallPtrSetImpl
<Instruction
*> &InstsToRemove
;
112 IntegerType
*ExtTy
= nullptr;
113 SmallPtrSet
<Value
*, 8> NewInsts
;
114 DenseMap
<Value
*, SmallVector
<Type
*, 4>> TruncTysMap
;
115 SmallPtrSet
<Value
*, 8> Promoted
;
117 void ReplaceAllUsersOfWith(Value
*From
, Value
*To
);
118 void ExtendSources();
119 void ConvertTruncs();
121 void TruncateSinks();
125 IRPromoter(LLVMContext
&C
, unsigned Width
, SetVector
<Value
*> &visited
,
126 SetVector
<Value
*> &sources
, SetVector
<Instruction
*> &sinks
,
127 SmallPtrSetImpl
<Instruction
*> &wrap
,
128 SmallPtrSetImpl
<Instruction
*> &instsToRemove
)
129 : Ctx(C
), PromotedWidth(Width
), Visited(visited
), Sources(sources
),
130 Sinks(sinks
), SafeWrap(wrap
), InstsToRemove(instsToRemove
) {
131 ExtTy
= IntegerType::get(Ctx
, PromotedWidth
);
137 class TypePromotionImpl
{
138 unsigned TypeSize
= 0;
139 LLVMContext
*Ctx
= nullptr;
140 unsigned RegisterBitWidth
= 0;
141 SmallPtrSet
<Value
*, 16> AllVisited
;
142 SmallPtrSet
<Instruction
*, 8> SafeToPromote
;
143 SmallPtrSet
<Instruction
*, 4> SafeWrap
;
144 SmallPtrSet
<Instruction
*, 4> InstsToRemove
;
146 // Does V have the same size result type as TypeSize.
147 bool EqualTypeSize(Value
*V
);
148 // Does V have the same size, or narrower, result type as TypeSize.
149 bool LessOrEqualTypeSize(Value
*V
);
150 // Does V have a result type that is wider than TypeSize.
151 bool GreaterThanTypeSize(Value
*V
);
152 // Does V have a result type that is narrower than TypeSize.
153 bool LessThanTypeSize(Value
*V
);
154 // Should V be a leaf in the promote tree?
155 bool isSource(Value
*V
);
156 // Should V be a root in the promotion tree?
157 bool isSink(Value
*V
);
158 // Should we change the result type of V? It will result in the users of V
160 bool shouldPromote(Value
*V
);
161 // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
162 // result won't affect the computation?
163 bool isSafeWrap(Instruction
*I
);
164 // Can V have its integer type promoted, or can the type be ignored.
165 bool isSupportedType(Value
*V
);
166 // Is V an instruction with a supported opcode or another value that we can
167 // handle, such as constants and basic blocks.
168 bool isSupportedValue(Value
*V
);
169 // Is V an instruction thats result can trivially promoted, or has safe
171 bool isLegalToPromote(Value
*V
);
172 bool TryToPromote(Value
*V
, unsigned PromotedWidth
, const LoopInfo
&LI
);
175 bool run(Function
&F
, const TargetMachine
*TM
,
176 const TargetTransformInfo
&TTI
, const LoopInfo
&LI
);
179 class TypePromotionLegacy
: public FunctionPass
{
183 TypePromotionLegacy() : FunctionPass(ID
) {}
185 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
186 AU
.addRequired
<LoopInfoWrapperPass
>();
187 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
188 AU
.addRequired
<TargetPassConfig
>();
189 AU
.setPreservesCFG();
190 AU
.addPreserved
<LoopInfoWrapperPass
>();
193 StringRef
getPassName() const override
{ return PASS_NAME
; }
195 bool runOnFunction(Function
&F
) override
;
200 static bool GenerateSignBits(Instruction
*I
) {
201 unsigned Opc
= I
->getOpcode();
202 return Opc
== Instruction::AShr
|| Opc
== Instruction::SDiv
||
203 Opc
== Instruction::SRem
|| Opc
== Instruction::SExt
;
206 bool TypePromotionImpl::EqualTypeSize(Value
*V
) {
207 return V
->getType()->getScalarSizeInBits() == TypeSize
;
210 bool TypePromotionImpl::LessOrEqualTypeSize(Value
*V
) {
211 return V
->getType()->getScalarSizeInBits() <= TypeSize
;
214 bool TypePromotionImpl::GreaterThanTypeSize(Value
*V
) {
215 return V
->getType()->getScalarSizeInBits() > TypeSize
;
218 bool TypePromotionImpl::LessThanTypeSize(Value
*V
) {
219 return V
->getType()->getScalarSizeInBits() < TypeSize
;
222 /// Return true if the given value is a source in the use-def chain, producing
223 /// a narrow 'TypeSize' value. These values will be zext to start the promotion
224 /// of the tree to i32. We guarantee that these won't populate the upper bits
225 /// of the register. ZExt on the loads will be free, and the same for call
226 /// return values because we only accept ones that guarantee a zeroext ret val.
227 /// Many arguments will have the zeroext attribute too, so those would be free
229 bool TypePromotionImpl::isSource(Value
*V
) {
230 if (!isa
<IntegerType
>(V
->getType()))
233 // TODO Allow zext to be sources.
234 if (isa
<Argument
>(V
))
236 else if (isa
<LoadInst
>(V
))
238 else if (auto *Call
= dyn_cast
<CallInst
>(V
))
239 return Call
->hasRetAttr(Attribute::AttrKind::ZExt
);
240 else if (auto *Trunc
= dyn_cast
<TruncInst
>(V
))
241 return EqualTypeSize(Trunc
);
245 /// Return true if V will require any promoted values to be truncated for the
246 /// the IR to remain valid. We can't mutate the value type of these
248 bool TypePromotionImpl::isSink(Value
*V
) {
249 // TODO The truncate also isn't actually necessary because we would already
250 // proved that the data value is kept within the range of the original data
251 // type. We currently remove any truncs inserted for handling zext sinks.
254 // - points where the value in the register is being observed, such as an
255 // icmp, switch or store.
256 // - points where value types have to match, such as calls and returns.
257 // - zext are included to ease the transformation and are generally removed
259 if (auto *Store
= dyn_cast
<StoreInst
>(V
))
260 return LessOrEqualTypeSize(Store
->getValueOperand());
261 if (auto *Return
= dyn_cast
<ReturnInst
>(V
))
262 return LessOrEqualTypeSize(Return
->getReturnValue());
263 if (auto *ZExt
= dyn_cast
<ZExtInst
>(V
))
264 return GreaterThanTypeSize(ZExt
);
265 if (auto *Switch
= dyn_cast
<SwitchInst
>(V
))
266 return LessThanTypeSize(Switch
->getCondition());
267 if (auto *ICmp
= dyn_cast
<ICmpInst
>(V
))
268 return ICmp
->isSigned() || LessThanTypeSize(ICmp
->getOperand(0));
270 return isa
<CallInst
>(V
);
273 /// Return whether this instruction can safely wrap.
274 bool TypePromotionImpl::isSafeWrap(Instruction
*I
) {
275 // We can support a potentially wrapping instruction (I) if:
276 // - It is only used by an unsigned icmp.
277 // - The icmp uses a constant.
278 // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279 // around zero to become a larger number than before.
280 // - The wrapping instruction (I) also uses a constant.
282 // We can then use the two constants to calculate whether the result would
283 // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284 // just underflows the range, the icmp would give the same result whether the
285 // result has been truncated or not. We calculate this by:
286 // - Zero extending both constants, if needed, to RegisterBitWidth.
287 // - Take the absolute value of I's constant, adding this to the icmp const.
288 // - Check that this value is not out of range for small type. If it is, it
289 // means that it has underflowed enough to wrap around the icmp constant.
293 // %sub = sub i8 %a, 2
294 // %cmp = icmp ule i8 %sub, 254
296 // If %a = 0, %sub = -2 == FE == 254
297 // But if this is evalulated as a i32
298 // %sub = -2 == FF FF FF FE == 4294967294
299 // So the unsigned compares (i8 and i32) would not yield the same result.
301 // Another way to look at it is:
305 // And we can't represent 256 in the i8 format, so we don't support it.
310 // %cmp = icmp ule i8 %sub, 254
312 // If %a = 0, %sub = -1 == FF == 255
314 // %sub = -1 == FF FF FF FF == 4294967295
316 // In this case, the unsigned compare results would be the same and this
317 // would also be true for ult, uge and ugt:
318 // - (255 < 254) == (0xFFFFFFFF < 254) == false
319 // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320 // - (255 > 254) == (0xFFFFFFFF > 254) == true
321 // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
323 // To demonstrate why we can't handle increasing values:
325 // %add = add i8 %a, 2
326 // %cmp = icmp ult i8 %add, 127
328 // If %a = 254, %add = 256 == (i8 1)
332 // (1 < 127) != (256 < 127)
334 unsigned Opc
= I
->getOpcode();
335 if (Opc
!= Instruction::Add
&& Opc
!= Instruction::Sub
)
338 if (!I
->hasOneUse() || !isa
<ICmpInst
>(*I
->user_begin()) ||
339 !isa
<ConstantInt
>(I
->getOperand(1)))
342 // Don't support an icmp that deals with sign bits.
343 auto *CI
= cast
<ICmpInst
>(*I
->user_begin());
344 if (CI
->isSigned() || CI
->isEquality())
347 ConstantInt
*ICmpConstant
= nullptr;
348 if (auto *Const
= dyn_cast
<ConstantInt
>(CI
->getOperand(0)))
349 ICmpConstant
= Const
;
350 else if (auto *Const
= dyn_cast
<ConstantInt
>(CI
->getOperand(1)))
351 ICmpConstant
= Const
;
355 const APInt
&ICmpConst
= ICmpConstant
->getValue();
356 APInt OverflowConst
= cast
<ConstantInt
>(I
->getOperand(1))->getValue();
357 if (Opc
== Instruction::Sub
)
358 OverflowConst
= -OverflowConst
;
359 if (!OverflowConst
.isNonPositive())
362 // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
363 // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
364 // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
365 if (OverflowConst
.sgt(ICmpConst
)) {
366 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
367 << "const of " << *I
<< "\n");
371 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
372 << "const of " << *I
<< " and " << *CI
<< "\n");
380 bool TypePromotionImpl::shouldPromote(Value
*V
) {
381 if (!isa
<IntegerType
>(V
->getType()) || isSink(V
))
387 auto *I
= dyn_cast
<Instruction
>(V
);
391 if (isa
<ICmpInst
>(I
))
397 /// Return whether we can safely mutate V's type to ExtTy without having to be
398 /// concerned with zero extending or truncation.
399 static bool isPromotedResultSafe(Instruction
*I
) {
400 if (GenerateSignBits(I
))
403 if (!isa
<OverflowingBinaryOperator
>(I
))
406 return I
->hasNoUnsignedWrap();
409 void IRPromoter::ReplaceAllUsersOfWith(Value
*From
, Value
*To
) {
410 SmallVector
<Instruction
*, 4> Users
;
411 Instruction
*InstTo
= dyn_cast
<Instruction
>(To
);
412 bool ReplacedAll
= true;
414 LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From
<< " with " << *To
417 for (Use
&U
: From
->uses()) {
418 auto *User
= cast
<Instruction
>(U
.getUser());
419 if (InstTo
&& User
->isIdenticalTo(InstTo
)) {
423 Users
.push_back(User
);
426 for (auto *U
: Users
)
427 U
->replaceUsesOfWith(From
, To
);
430 if (auto *I
= dyn_cast
<Instruction
>(From
))
431 InstsToRemove
.insert(I
);
434 void IRPromoter::ExtendSources() {
435 IRBuilder
<> Builder
{Ctx
};
437 auto InsertZExt
= [&](Value
*V
, Instruction
*InsertPt
) {
438 assert(V
->getType() != ExtTy
&& "zext already extends to i32");
439 LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V
<< "\n");
440 Builder
.SetInsertPoint(InsertPt
);
441 if (auto *I
= dyn_cast
<Instruction
>(V
))
442 Builder
.SetCurrentDebugLocation(I
->getDebugLoc());
444 Value
*ZExt
= Builder
.CreateZExt(V
, ExtTy
);
445 if (auto *I
= dyn_cast
<Instruction
>(ZExt
)) {
446 if (isa
<Argument
>(V
))
447 I
->moveBefore(InsertPt
);
449 I
->moveAfter(InsertPt
);
453 ReplaceAllUsersOfWith(V
, ZExt
);
456 // Now, insert extending instructions between the sources and their users.
457 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
458 for (auto *V
: Sources
) {
459 LLVM_DEBUG(dbgs() << " - " << *V
<< "\n");
460 if (auto *I
= dyn_cast
<Instruction
>(V
))
462 else if (auto *Arg
= dyn_cast
<Argument
>(V
)) {
463 BasicBlock
&BB
= Arg
->getParent()->front();
464 InsertZExt(Arg
, &*BB
.getFirstInsertionPt());
466 llvm_unreachable("unhandled source that needs extending");
472 void IRPromoter::PromoteTree() {
473 LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
475 // Mutate the types of the instructions within the tree. Here we handle
476 // constant operands.
477 for (auto *V
: Visited
) {
478 if (Sources
.count(V
))
481 auto *I
= cast
<Instruction
>(V
);
485 for (unsigned i
= 0, e
= I
->getNumOperands(); i
< e
; ++i
) {
486 Value
*Op
= I
->getOperand(i
);
487 if ((Op
->getType() == ExtTy
) || !isa
<IntegerType
>(Op
->getType()))
490 if (auto *Const
= dyn_cast
<ConstantInt
>(Op
)) {
491 // For subtract, we don't need to sext the constant. We only put it in
492 // SafeWrap because SafeWrap.size() is used elsewhere.
493 // For cmp, we need to sign extend a constant appearing in either
494 // operand. For add, we should only sign extend the RHS.
496 ConstantInt::get(Const
->getContext(),
497 (SafeWrap
.contains(I
) &&
498 (I
->getOpcode() == Instruction::ICmp
|| i
== 1) &&
499 I
->getOpcode() != Instruction::Sub
)
500 ? Const
->getValue().sext(PromotedWidth
)
501 : Const
->getValue().zext(PromotedWidth
));
502 I
->setOperand(i
, NewConst
);
503 } else if (isa
<UndefValue
>(Op
))
504 I
->setOperand(i
, ConstantInt::get(ExtTy
, 0));
507 // Mutate the result type, unless this is an icmp or switch.
508 if (!isa
<ICmpInst
>(I
) && !isa
<SwitchInst
>(I
)) {
509 I
->mutateType(ExtTy
);
515 void IRPromoter::TruncateSinks() {
516 LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
518 IRBuilder
<> Builder
{Ctx
};
520 auto InsertTrunc
= [&](Value
*V
, Type
*TruncTy
) -> Instruction
* {
521 if (!isa
<Instruction
>(V
) || !isa
<IntegerType
>(V
->getType()))
524 if ((!Promoted
.count(V
) && !NewInsts
.count(V
)) || Sources
.count(V
))
527 LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy
<< " Trunc for "
529 Builder
.SetInsertPoint(cast
<Instruction
>(V
));
530 auto *Trunc
= dyn_cast
<Instruction
>(Builder
.CreateTrunc(V
, TruncTy
));
532 NewInsts
.insert(Trunc
);
536 // Fix up any stores or returns that use the results of the promoted
538 for (auto *I
: Sinks
) {
539 LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I
<< "\n");
541 // Handle calls separately as we need to iterate over arg operands.
542 if (auto *Call
= dyn_cast
<CallInst
>(I
)) {
543 for (unsigned i
= 0; i
< Call
->arg_size(); ++i
) {
544 Value
*Arg
= Call
->getArgOperand(i
);
545 Type
*Ty
= TruncTysMap
[Call
][i
];
546 if (Instruction
*Trunc
= InsertTrunc(Arg
, Ty
)) {
547 Trunc
->moveBefore(Call
);
548 Call
->setArgOperand(i
, Trunc
);
554 // Special case switches because we need to truncate the condition.
555 if (auto *Switch
= dyn_cast
<SwitchInst
>(I
)) {
556 Type
*Ty
= TruncTysMap
[Switch
][0];
557 if (Instruction
*Trunc
= InsertTrunc(Switch
->getCondition(), Ty
)) {
558 Trunc
->moveBefore(Switch
);
559 Switch
->setCondition(Trunc
);
564 // Don't insert a trunc for a zext which can still legally promote.
565 // Nor insert a trunc when the input value to that trunc has the same width
566 // as the zext we are inserting it for. When this happens the input operand
567 // for the zext will be promoted to the same width as the zext's return type
568 // rendering that zext unnecessary. This zext gets removed before the end
570 if (auto ZExt
= dyn_cast
<ZExtInst
>(I
))
571 if (ZExt
->getType()->getScalarSizeInBits() >= PromotedWidth
)
574 // Now handle the others.
575 for (unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
576 Type
*Ty
= TruncTysMap
[I
][i
];
577 if (Instruction
*Trunc
= InsertTrunc(I
->getOperand(i
), Ty
)) {
578 Trunc
->moveBefore(I
);
579 I
->setOperand(i
, Trunc
);
585 void IRPromoter::Cleanup() {
586 LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
587 // Some zexts will now have become redundant, along with their trunc
588 // operands, so remove them.
589 for (auto *V
: Visited
) {
590 if (!isa
<ZExtInst
>(V
))
593 auto ZExt
= cast
<ZExtInst
>(V
);
594 if (ZExt
->getDestTy() != ExtTy
)
597 Value
*Src
= ZExt
->getOperand(0);
598 if (ZExt
->getSrcTy() == ZExt
->getDestTy()) {
599 LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
601 ReplaceAllUsersOfWith(ZExt
, Src
);
605 // We've inserted a trunc for a zext sink, but we already know that the
606 // input is in range, negating the need for the trunc.
607 if (NewInsts
.count(Src
) && isa
<TruncInst
>(Src
)) {
608 auto *Trunc
= cast
<TruncInst
>(Src
);
609 assert(Trunc
->getOperand(0)->getType() == ExtTy
&&
610 "expected inserted trunc to be operating on i32");
611 ReplaceAllUsersOfWith(ZExt
, Trunc
->getOperand(0));
615 for (auto *I
: InstsToRemove
) {
616 LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I
<< "\n");
617 I
->dropAllReferences();
621 void IRPromoter::ConvertTruncs() {
622 LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
623 IRBuilder
<> Builder
{Ctx
};
625 for (auto *V
: Visited
) {
626 if (!isa
<TruncInst
>(V
) || Sources
.count(V
))
629 auto *Trunc
= cast
<TruncInst
>(V
);
630 Builder
.SetInsertPoint(Trunc
);
631 IntegerType
*SrcTy
= cast
<IntegerType
>(Trunc
->getOperand(0)->getType());
632 IntegerType
*DestTy
= cast
<IntegerType
>(TruncTysMap
[Trunc
][0]);
634 unsigned NumBits
= DestTy
->getScalarSizeInBits();
636 ConstantInt::get(SrcTy
, APInt::getMaxValue(NumBits
).getZExtValue());
637 Value
*Masked
= Builder
.CreateAnd(Trunc
->getOperand(0), Mask
);
639 Masked
= Builder
.CreateTrunc(Masked
, ExtTy
);
641 if (auto *I
= dyn_cast
<Instruction
>(Masked
))
644 ReplaceAllUsersOfWith(Trunc
, Masked
);
648 void IRPromoter::Mutate() {
649 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
650 << PromotedWidth
<< "-bits\n");
652 // Cache original types of the values that will likely need truncating
653 for (auto *I
: Sinks
) {
654 if (auto *Call
= dyn_cast
<CallInst
>(I
)) {
655 for (Value
*Arg
: Call
->args())
656 TruncTysMap
[Call
].push_back(Arg
->getType());
657 } else if (auto *Switch
= dyn_cast
<SwitchInst
>(I
))
658 TruncTysMap
[I
].push_back(Switch
->getCondition()->getType());
660 for (unsigned i
= 0; i
< I
->getNumOperands(); ++i
)
661 TruncTysMap
[I
].push_back(I
->getOperand(i
)->getType());
664 for (auto *V
: Visited
) {
665 if (!isa
<TruncInst
>(V
) || Sources
.count(V
))
667 auto *Trunc
= cast
<TruncInst
>(V
);
668 TruncTysMap
[Trunc
].push_back(Trunc
->getDestTy());
671 // Insert zext instructions between sources and their users.
674 // Promote visited instructions, mutating their types in place.
677 // Convert any truncs, that aren't sources, into AND masks.
680 // Insert trunc instructions for use by calls, stores etc...
683 // Finally, remove unecessary zexts and truncs, delete old instructions and
684 // clear the data structures.
687 LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
690 /// We disallow booleans to make life easier when dealing with icmps but allow
691 /// any other integer that fits in a scalar register. Void types are accepted
692 /// so we can handle switches.
693 bool TypePromotionImpl::isSupportedType(Value
*V
) {
694 Type
*Ty
= V
->getType();
696 // Allow voids and pointers, these won't be promoted.
697 if (Ty
->isVoidTy() || Ty
->isPointerTy())
700 if (!isa
<IntegerType
>(Ty
) || cast
<IntegerType
>(Ty
)->getBitWidth() == 1 ||
701 cast
<IntegerType
>(Ty
)->getBitWidth() > RegisterBitWidth
)
704 return LessOrEqualTypeSize(V
);
707 /// We accept most instructions, as well as Arguments and ConstantInsts. We
708 /// Disallow casts other than zext and truncs and only allow calls if their
709 /// return value is zeroext. We don't allow opcodes that can introduce sign
711 bool TypePromotionImpl::isSupportedValue(Value
*V
) {
712 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
713 switch (I
->getOpcode()) {
715 return isa
<BinaryOperator
>(I
) && isSupportedType(I
) &&
716 !GenerateSignBits(I
);
717 case Instruction::GetElementPtr
:
718 case Instruction::Store
:
719 case Instruction::Br
:
720 case Instruction::Switch
:
722 case Instruction::PHI
:
723 case Instruction::Select
:
724 case Instruction::Ret
:
725 case Instruction::Load
:
726 case Instruction::Trunc
:
727 return isSupportedType(I
);
728 case Instruction::BitCast
:
729 return I
->getOperand(0)->getType() == I
->getType();
730 case Instruction::ZExt
:
731 return isSupportedType(I
->getOperand(0));
732 case Instruction::ICmp
:
733 // Now that we allow small types than TypeSize, only allow icmp of
734 // TypeSize because they will require a trunc to be legalised.
735 // TODO: Allow icmp of smaller types, and calculate at the end
736 // whether the transform would be beneficial.
737 if (isa
<PointerType
>(I
->getOperand(0)->getType()))
739 return EqualTypeSize(I
->getOperand(0));
740 case Instruction::Call
: {
741 // Special cases for calls as we need to check for zeroext
742 // TODO We should accept calls even if they don't have zeroext, as they
743 // can still be sinks.
744 auto *Call
= cast
<CallInst
>(I
);
745 return isSupportedType(Call
) &&
746 Call
->hasRetAttr(Attribute::AttrKind::ZExt
);
749 } else if (isa
<Constant
>(V
) && !isa
<ConstantExpr
>(V
)) {
750 return isSupportedType(V
);
751 } else if (isa
<Argument
>(V
))
752 return isSupportedType(V
);
754 return isa
<BasicBlock
>(V
);
757 /// Check that the type of V would be promoted and that the original type is
758 /// smaller than the targeted promoted type. Check that we're not trying to
759 /// promote something larger than our base 'TypeSize' type.
760 bool TypePromotionImpl::isLegalToPromote(Value
*V
) {
761 auto *I
= dyn_cast
<Instruction
>(V
);
765 if (SafeToPromote
.count(I
))
768 if (isPromotedResultSafe(I
) || isSafeWrap(I
)) {
769 SafeToPromote
.insert(I
);
775 bool TypePromotionImpl::TryToPromote(Value
*V
, unsigned PromotedWidth
,
776 const LoopInfo
&LI
) {
777 Type
*OrigTy
= V
->getType();
778 TypeSize
= OrigTy
->getPrimitiveSizeInBits().getFixedValue();
779 SafeToPromote
.clear();
782 if (!isSupportedValue(V
) || !shouldPromote(V
) || !isLegalToPromote(V
))
785 LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V
<< ", from "
786 << TypeSize
<< " bits to " << PromotedWidth
<< "\n");
788 SetVector
<Value
*> WorkList
;
789 SetVector
<Value
*> Sources
;
790 SetVector
<Instruction
*> Sinks
;
791 SetVector
<Value
*> CurrentVisited
;
794 // Return true if V was added to the worklist as a supported instruction,
795 // if it was already visited, or if we don't need to explore it (e.g.
796 // pointer values and GEPs), and false otherwise.
797 auto AddLegalInst
= [&](Value
*V
) {
798 if (CurrentVisited
.count(V
))
801 // Ignore GEPs because they don't need promoting and the constant indices
802 // will prevent the transformation.
803 if (isa
<GetElementPtrInst
>(V
))
806 if (!isSupportedValue(V
) || (shouldPromote(V
) && !isLegalToPromote(V
))) {
807 LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V
<< "\n");
815 // Iterate through, and add to, a tree of operands and users in the use-def.
816 while (!WorkList
.empty()) {
817 Value
*V
= WorkList
.pop_back_val();
818 if (CurrentVisited
.count(V
))
821 // Ignore non-instructions, other than arguments.
822 if (!isa
<Instruction
>(V
) && !isSource(V
))
825 // If we've already visited this value from somewhere, bail now because
826 // the tree has already been explored.
827 // TODO: This could limit the transform, ie if we try to promote something
828 // from an i8 and fail first, before trying an i16.
829 if (AllVisited
.count(V
))
832 CurrentVisited
.insert(V
);
833 AllVisited
.insert(V
);
835 // Calls can be both sources and sinks.
837 Sinks
.insert(cast
<Instruction
>(V
));
842 if (!isSink(V
) && !isSource(V
)) {
843 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
844 // Visit operands of any instruction visited.
845 for (auto &U
: I
->operands()) {
846 if (!AddLegalInst(U
))
852 // Don't visit users of a node which isn't going to be mutated unless its a
854 if (isSource(V
) || shouldPromote(V
)) {
855 for (Use
&U
: V
->uses()) {
856 if (!AddLegalInst(U
.getUser()))
863 dbgs() << "IR Promotion: Visited nodes:\n";
864 for (auto *I
: CurrentVisited
)
868 unsigned ToPromote
= 0;
869 unsigned NonFreeArgs
= 0;
870 unsigned NonLoopSources
= 0, LoopSinks
= 0;
871 SmallPtrSet
<BasicBlock
*, 4> Blocks
;
872 for (auto *CV
: CurrentVisited
) {
873 if (auto *I
= dyn_cast
<Instruction
>(CV
))
874 Blocks
.insert(I
->getParent());
876 if (Sources
.count(CV
)) {
877 if (auto *Arg
= dyn_cast
<Argument
>(CV
))
878 if (!Arg
->hasZExtAttr() && !Arg
->hasSExtAttr())
880 if (!isa
<Instruction
>(CV
) ||
881 !LI
.getLoopFor(cast
<Instruction
>(CV
)->getParent()))
886 if (isa
<PHINode
>(CV
))
888 if (LI
.getLoopFor(cast
<Instruction
>(CV
)->getParent()))
890 if (Sinks
.count(cast
<Instruction
>(CV
)))
895 // DAG optimizations should be able to handle these cases better, especially
896 // for function arguments.
897 if (!isa
<PHINode
>(V
) && !(LoopSinks
&& NonLoopSources
) &&
898 (ToPromote
< 2 || (Blocks
.size() == 1 && NonFreeArgs
> SafeWrap
.size())))
901 IRPromoter
Promoter(*Ctx
, PromotedWidth
, CurrentVisited
, Sources
, Sinks
,
902 SafeWrap
, InstsToRemove
);
907 bool TypePromotionImpl::run(Function
&F
, const TargetMachine
*TM
,
908 const TargetTransformInfo
&TTI
,
909 const LoopInfo
&LI
) {
910 if (DisablePromotion
)
913 LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F
.getName() << "\n");
916 SafeToPromote
.clear();
918 bool MadeChange
= false;
919 const DataLayout
&DL
= F
.getParent()->getDataLayout();
920 const TargetSubtargetInfo
*SubtargetInfo
= TM
->getSubtargetImpl(F
);
921 const TargetLowering
*TLI
= SubtargetInfo
->getTargetLowering();
923 TTI
.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar
).getFixedValue();
924 Ctx
= &F
.getParent()->getContext();
926 // Return the preferred integer width of the instruction, or zero if we
928 auto GetPromoteWidth
= [&](Instruction
*I
) -> uint32_t {
929 if (!isa
<IntegerType
>(I
->getType()))
932 EVT SrcVT
= TLI
->getValueType(DL
, I
->getType());
933 if (SrcVT
.isSimple() && TLI
->isTypeLegal(SrcVT
.getSimpleVT()))
936 if (TLI
->getTypeAction(*Ctx
, SrcVT
) != TargetLowering::TypePromoteInteger
)
939 EVT PromotedVT
= TLI
->getTypeToTransformTo(*Ctx
, SrcVT
);
940 if (RegisterBitWidth
< PromotedVT
.getFixedSizeInBits()) {
941 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
942 << "for promoted type\n");
946 // TODO: Should we prefer to use RegisterBitWidth instead?
947 return PromotedVT
.getFixedSizeInBits();
950 auto BBIsInLoop
= [&](BasicBlock
*BB
) -> bool {
957 for (BasicBlock
&BB
: F
) {
958 for (Instruction
&I
: BB
) {
959 if (AllVisited
.count(&I
))
962 if (isa
<ZExtInst
>(&I
) && isa
<PHINode
>(I
.getOperand(0)) &&
963 isa
<IntegerType
>(I
.getType()) && BBIsInLoop(&BB
)) {
964 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: "
965 << *I
.getOperand(0) << "\n");
966 EVT ZExtVT
= TLI
->getValueType(DL
, I
.getType());
967 Instruction
*Phi
= static_cast<Instruction
*>(I
.getOperand(0));
968 auto PromoteWidth
= ZExtVT
.getFixedSizeInBits();
969 if (RegisterBitWidth
< PromoteWidth
) {
970 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
971 << "register for ZExt type\n");
974 MadeChange
|= TryToPromote(Phi
, PromoteWidth
, LI
);
975 } else if (auto *ICmp
= dyn_cast
<ICmpInst
>(&I
)) {
976 // Search up from icmps to try to promote their operands.
977 // Skip signed or pointer compares
978 if (ICmp
->isSigned())
981 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp
<< "\n");
983 for (auto &Op
: ICmp
->operands()) {
984 if (auto *OpI
= dyn_cast
<Instruction
>(Op
)) {
985 if (auto PromotedWidth
= GetPromoteWidth(OpI
)) {
986 MadeChange
|= TryToPromote(OpI
, PromotedWidth
, LI
);
993 if (!InstsToRemove
.empty()) {
994 for (auto *I
: InstsToRemove
)
995 I
->eraseFromParent();
996 InstsToRemove
.clear();
1001 SafeToPromote
.clear();
1007 INITIALIZE_PASS_BEGIN(TypePromotionLegacy
, DEBUG_TYPE
, PASS_NAME
, false, false)
1008 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
1009 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
1010 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
1011 INITIALIZE_PASS_END(TypePromotionLegacy
, DEBUG_TYPE
, PASS_NAME
, false, false)
1013 char TypePromotionLegacy::ID
= 0;
1015 bool TypePromotionLegacy::runOnFunction(Function
&F
) {
1016 if (skipFunction(F
))
1019 auto &TPC
= getAnalysis
<TargetPassConfig
>();
1020 auto *TM
= &TPC
.getTM
<TargetMachine
>();
1021 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
1022 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
1024 TypePromotionImpl TP
;
1025 return TP
.run(F
, TM
, TTI
, LI
);
1028 FunctionPass
*llvm::createTypePromotionLegacyPass() {
1029 return new TypePromotionLegacy();
1032 PreservedAnalyses
TypePromotionPass::run(Function
&F
,
1033 FunctionAnalysisManager
&AM
) {
1034 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
1035 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
1036 TypePromotionImpl TP
;
1038 bool Changed
= TP
.run(F
, TM
, TTI
, LI
);
1040 return PreservedAnalyses::all();
1042 PreservedAnalyses PA
;
1043 PA
.preserveSet
<CFGAnalyses
>();
1044 PA
.preserve
<LoopAnalysis
>();