1 //===----- TypePromotion.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This is an opcode based type promotion pass for small types that would
11 /// otherwise be promoted during legalisation. This works around the limitations
12 /// of selection dag for cyclic regions. The search begins from icmp
13 /// instructions operands where a tree, consisting of non-wrapping or safe
14 /// wrapping instructions, is built, checked and promoted if possible.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Target/TargetMachine.h"
41 #define DEBUG_TYPE "type-promotion"
42 #define PASS_NAME "Type Promotion"
46 static cl::opt
<bool> DisablePromotion("disable-type-promotion", cl::Hidden
,
48 cl::desc("Disable type promotion pass"));
50 // The goal of this pass is to enable more efficient code generation for
51 // operations on narrow types (i.e. types with < 32-bits) and this is a
52 // motivating IR code example:
54 // define hidden i32 @cmp(i8 zeroext) {
55 // %2 = add i8 %0, -49
56 // %3 = icmp ult i8 %2, 3
60 // The issue here is that i8 is type-legalized to i32 because i8 is not a
61 // legal type. Thus, arithmetic is done in integer-precision, but then the
62 // byte value is masked out as follows:
64 // t19: i32 = add t4, Constant:i32<-49>
65 // t24: i32 = and t19, Constant:i32<255>
67 // Consequently, we generate code like this:
73 // This shows that masking out the byte value results in generation of
74 // the UXTB instruction. This is not optimal as r0 already contains the byte
75 // value we need, and so instead we can just generate:
80 // We achieve this by type promoting the IR to i32 like so for this example:
82 // define i32 @cmp(i8 zeroext %c) {
83 // %0 = zext i8 %c to i32
84 // %c.off = add i32 %0, -49
85 // %1 = icmp ult i32 %c.off, 3
89 // For this to be valid and legal, we need to prove that the i32 add is
90 // producing the same value as the i8 addition, and that e.g. no overflow
93 // A brief sketch of the algorithm and some terminology.
94 // We pattern match interesting IR patterns:
95 // - which have "sources": instructions producing narrow values (i8, i16), and
96 // - they have "sinks": instructions consuming these narrow values.
98 // We collect all instruction connecting sources and sinks in a worklist, so
99 // that we can mutate these instruction and perform type promotion when it is
105 unsigned PromotedWidth
= 0;
106 SetVector
<Value
*> &Visited
;
107 SetVector
<Value
*> &Sources
;
108 SetVector
<Instruction
*> &Sinks
;
109 SmallPtrSetImpl
<Instruction
*> &SafeWrap
;
110 SmallPtrSetImpl
<Instruction
*> &InstsToRemove
;
111 IntegerType
*ExtTy
= nullptr;
112 SmallPtrSet
<Value
*, 8> NewInsts
;
113 DenseMap
<Value
*, SmallVector
<Type
*, 4>> TruncTysMap
;
114 SmallPtrSet
<Value
*, 8> Promoted
;
116 void ReplaceAllUsersOfWith(Value
*From
, Value
*To
);
117 void ExtendSources();
118 void ConvertTruncs();
120 void TruncateSinks();
124 IRPromoter(LLVMContext
&C
, unsigned Width
, SetVector
<Value
*> &visited
,
125 SetVector
<Value
*> &sources
, SetVector
<Instruction
*> &sinks
,
126 SmallPtrSetImpl
<Instruction
*> &wrap
,
127 SmallPtrSetImpl
<Instruction
*> &instsToRemove
)
128 : Ctx(C
), PromotedWidth(Width
), Visited(visited
), Sources(sources
),
129 Sinks(sinks
), SafeWrap(wrap
), InstsToRemove(instsToRemove
) {
130 ExtTy
= IntegerType::get(Ctx
, PromotedWidth
);
136 class TypePromotion
: public FunctionPass
{
137 unsigned TypeSize
= 0;
138 LLVMContext
*Ctx
= nullptr;
139 unsigned RegisterBitWidth
= 0;
140 SmallPtrSet
<Value
*, 16> AllVisited
;
141 SmallPtrSet
<Instruction
*, 8> SafeToPromote
;
142 SmallPtrSet
<Instruction
*, 4> SafeWrap
;
143 SmallPtrSet
<Instruction
*, 4> InstsToRemove
;
145 // Does V have the same size result type as TypeSize.
146 bool EqualTypeSize(Value
*V
);
147 // Does V have the same size, or narrower, result type as TypeSize.
148 bool LessOrEqualTypeSize(Value
*V
);
149 // Does V have a result type that is wider than TypeSize.
150 bool GreaterThanTypeSize(Value
*V
);
151 // Does V have a result type that is narrower than TypeSize.
152 bool LessThanTypeSize(Value
*V
);
153 // Should V be a leaf in the promote tree?
154 bool isSource(Value
*V
);
155 // Should V be a root in the promotion tree?
156 bool isSink(Value
*V
);
157 // Should we change the result type of V? It will result in the users of V
159 bool shouldPromote(Value
*V
);
160 // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
161 // result won't affect the computation?
162 bool isSafeWrap(Instruction
*I
);
163 // Can V have its integer type promoted, or can the type be ignored.
164 bool isSupportedType(Value
*V
);
165 // Is V an instruction with a supported opcode or another value that we can
166 // handle, such as constants and basic blocks.
167 bool isSupportedValue(Value
*V
);
168 // Is V an instruction thats result can trivially promoted, or has safe
170 bool isLegalToPromote(Value
*V
);
171 bool TryToPromote(Value
*V
, unsigned PromotedWidth
);
176 TypePromotion() : FunctionPass(ID
) {}
178 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
179 AU
.addRequired
<LoopInfoWrapperPass
>();
180 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
181 AU
.addRequired
<TargetPassConfig
>();
182 AU
.setPreservesCFG();
183 AU
.addPreserved
<LoopInfoWrapperPass
>();
186 StringRef
getPassName() const override
{ return PASS_NAME
; }
188 bool runOnFunction(Function
&F
) override
;
193 static bool GenerateSignBits(Instruction
*I
) {
194 unsigned Opc
= I
->getOpcode();
195 return Opc
== Instruction::AShr
|| Opc
== Instruction::SDiv
||
196 Opc
== Instruction::SRem
|| Opc
== Instruction::SExt
;
199 bool TypePromotion::EqualTypeSize(Value
*V
) {
200 return V
->getType()->getScalarSizeInBits() == TypeSize
;
203 bool TypePromotion::LessOrEqualTypeSize(Value
*V
) {
204 return V
->getType()->getScalarSizeInBits() <= TypeSize
;
207 bool TypePromotion::GreaterThanTypeSize(Value
*V
) {
208 return V
->getType()->getScalarSizeInBits() > TypeSize
;
211 bool TypePromotion::LessThanTypeSize(Value
*V
) {
212 return V
->getType()->getScalarSizeInBits() < TypeSize
;
215 /// Return true if the given value is a source in the use-def chain, producing
216 /// a narrow 'TypeSize' value. These values will be zext to start the promotion
217 /// of the tree to i32. We guarantee that these won't populate the upper bits
218 /// of the register. ZExt on the loads will be free, and the same for call
219 /// return values because we only accept ones that guarantee a zeroext ret val.
220 /// Many arguments will have the zeroext attribute too, so those would be free
222 bool TypePromotion::isSource(Value
*V
) {
223 if (!isa
<IntegerType
>(V
->getType()))
226 // TODO Allow zext to be sources.
227 if (isa
<Argument
>(V
))
229 else if (isa
<LoadInst
>(V
))
231 else if (isa
<BitCastInst
>(V
))
233 else if (auto *Call
= dyn_cast
<CallInst
>(V
))
234 return Call
->hasRetAttr(Attribute::AttrKind::ZExt
);
235 else if (auto *Trunc
= dyn_cast
<TruncInst
>(V
))
236 return EqualTypeSize(Trunc
);
240 /// Return true if V will require any promoted values to be truncated for the
241 /// the IR to remain valid. We can't mutate the value type of these
243 bool TypePromotion::isSink(Value
*V
) {
244 // TODO The truncate also isn't actually necessary because we would already
245 // proved that the data value is kept within the range of the original data
246 // type. We currently remove any truncs inserted for handling zext sinks.
249 // - points where the value in the register is being observed, such as an
250 // icmp, switch or store.
251 // - points where value types have to match, such as calls and returns.
252 // - zext are included to ease the transformation and are generally removed
254 if (auto *Store
= dyn_cast
<StoreInst
>(V
))
255 return LessOrEqualTypeSize(Store
->getValueOperand());
256 if (auto *Return
= dyn_cast
<ReturnInst
>(V
))
257 return LessOrEqualTypeSize(Return
->getReturnValue());
258 if (auto *ZExt
= dyn_cast
<ZExtInst
>(V
))
259 return GreaterThanTypeSize(ZExt
);
260 if (auto *Switch
= dyn_cast
<SwitchInst
>(V
))
261 return LessThanTypeSize(Switch
->getCondition());
262 if (auto *ICmp
= dyn_cast
<ICmpInst
>(V
))
263 return ICmp
->isSigned() || LessThanTypeSize(ICmp
->getOperand(0));
265 return isa
<CallInst
>(V
);
268 /// Return whether this instruction can safely wrap.
269 bool TypePromotion::isSafeWrap(Instruction
*I
) {
270 // We can support a potentially wrapping instruction (I) if:
271 // - It is only used by an unsigned icmp.
272 // - The icmp uses a constant.
273 // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
274 // around zero to become a larger number than before.
275 // - The wrapping instruction (I) also uses a constant.
277 // We can then use the two constants to calculate whether the result would
278 // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
279 // just underflows the range, the icmp would give the same result whether the
280 // result has been truncated or not. We calculate this by:
281 // - Zero extending both constants, if needed, to RegisterBitWidth.
282 // - Take the absolute value of I's constant, adding this to the icmp const.
283 // - Check that this value is not out of range for small type. If it is, it
284 // means that it has underflowed enough to wrap around the icmp constant.
288 // %sub = sub i8 %a, 2
289 // %cmp = icmp ule i8 %sub, 254
291 // If %a = 0, %sub = -2 == FE == 254
292 // But if this is evalulated as a i32
293 // %sub = -2 == FF FF FF FE == 4294967294
294 // So the unsigned compares (i8 and i32) would not yield the same result.
296 // Another way to look at it is:
300 // And we can't represent 256 in the i8 format, so we don't support it.
305 // %cmp = icmp ule i8 %sub, 254
307 // If %a = 0, %sub = -1 == FF == 255
309 // %sub = -1 == FF FF FF FF == 4294967295
311 // In this case, the unsigned compare results would be the same and this
312 // would also be true for ult, uge and ugt:
313 // - (255 < 254) == (0xFFFFFFFF < 254) == false
314 // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
315 // - (255 > 254) == (0xFFFFFFFF > 254) == true
316 // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
318 // To demonstrate why we can't handle increasing values:
320 // %add = add i8 %a, 2
321 // %cmp = icmp ult i8 %add, 127
323 // If %a = 254, %add = 256 == (i8 1)
327 // (1 < 127) != (256 < 127)
329 unsigned Opc
= I
->getOpcode();
330 if (Opc
!= Instruction::Add
&& Opc
!= Instruction::Sub
)
333 if (!I
->hasOneUse() || !isa
<ICmpInst
>(*I
->user_begin()) ||
334 !isa
<ConstantInt
>(I
->getOperand(1)))
337 // Don't support an icmp that deals with sign bits.
338 auto *CI
= cast
<ICmpInst
>(*I
->user_begin());
339 if (CI
->isSigned() || CI
->isEquality())
342 ConstantInt
*ICmpConstant
= nullptr;
343 if (auto *Const
= dyn_cast
<ConstantInt
>(CI
->getOperand(0)))
344 ICmpConstant
= Const
;
345 else if (auto *Const
= dyn_cast
<ConstantInt
>(CI
->getOperand(1)))
346 ICmpConstant
= Const
;
350 const APInt
&ICmpConst
= ICmpConstant
->getValue();
351 APInt OverflowConst
= cast
<ConstantInt
>(I
->getOperand(1))->getValue();
352 if (Opc
== Instruction::Sub
)
353 OverflowConst
= -OverflowConst
;
354 if (!OverflowConst
.isNonPositive())
357 // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
358 // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
359 // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
360 if (OverflowConst
.sgt(ICmpConst
)) {
361 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
362 << "const of " << *I
<< "\n");
366 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
367 << "const of " << *I
<< " and " << *CI
<< "\n");
375 bool TypePromotion::shouldPromote(Value
*V
) {
376 if (!isa
<IntegerType
>(V
->getType()) || isSink(V
))
382 auto *I
= dyn_cast
<Instruction
>(V
);
386 if (isa
<ICmpInst
>(I
))
392 /// Return whether we can safely mutate V's type to ExtTy without having to be
393 /// concerned with zero extending or truncation.
394 static bool isPromotedResultSafe(Instruction
*I
) {
395 if (GenerateSignBits(I
))
398 if (!isa
<OverflowingBinaryOperator
>(I
))
401 return I
->hasNoUnsignedWrap();
404 void IRPromoter::ReplaceAllUsersOfWith(Value
*From
, Value
*To
) {
405 SmallVector
<Instruction
*, 4> Users
;
406 Instruction
*InstTo
= dyn_cast
<Instruction
>(To
);
407 bool ReplacedAll
= true;
409 LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From
<< " with " << *To
412 for (Use
&U
: From
->uses()) {
413 auto *User
= cast
<Instruction
>(U
.getUser());
414 if (InstTo
&& User
->isIdenticalTo(InstTo
)) {
418 Users
.push_back(User
);
421 for (auto *U
: Users
)
422 U
->replaceUsesOfWith(From
, To
);
425 if (auto *I
= dyn_cast
<Instruction
>(From
))
426 InstsToRemove
.insert(I
);
429 void IRPromoter::ExtendSources() {
430 IRBuilder
<> Builder
{Ctx
};
432 auto InsertZExt
= [&](Value
*V
, Instruction
*InsertPt
) {
433 assert(V
->getType() != ExtTy
&& "zext already extends to i32");
434 LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V
<< "\n");
435 Builder
.SetInsertPoint(InsertPt
);
436 if (auto *I
= dyn_cast
<Instruction
>(V
))
437 Builder
.SetCurrentDebugLocation(I
->getDebugLoc());
439 Value
*ZExt
= Builder
.CreateZExt(V
, ExtTy
);
440 if (auto *I
= dyn_cast
<Instruction
>(ZExt
)) {
441 if (isa
<Argument
>(V
))
442 I
->moveBefore(InsertPt
);
444 I
->moveAfter(InsertPt
);
448 ReplaceAllUsersOfWith(V
, ZExt
);
451 // Now, insert extending instructions between the sources and their users.
452 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
453 for (auto *V
: Sources
) {
454 LLVM_DEBUG(dbgs() << " - " << *V
<< "\n");
455 if (auto *I
= dyn_cast
<Instruction
>(V
))
457 else if (auto *Arg
= dyn_cast
<Argument
>(V
)) {
458 BasicBlock
&BB
= Arg
->getParent()->front();
459 InsertZExt(Arg
, &*BB
.getFirstInsertionPt());
461 llvm_unreachable("unhandled source that needs extending");
467 void IRPromoter::PromoteTree() {
468 LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
470 // Mutate the types of the instructions within the tree. Here we handle
471 // constant operands.
472 for (auto *V
: Visited
) {
473 if (Sources
.count(V
))
476 auto *I
= cast
<Instruction
>(V
);
480 for (unsigned i
= 0, e
= I
->getNumOperands(); i
< e
; ++i
) {
481 Value
*Op
= I
->getOperand(i
);
482 if ((Op
->getType() == ExtTy
) || !isa
<IntegerType
>(Op
->getType()))
485 if (auto *Const
= dyn_cast
<ConstantInt
>(Op
)) {
486 // For subtract, we don't need to sext the constant. We only put it in
487 // SafeWrap because SafeWrap.size() is used elsewhere.
488 // For cmp, we need to sign extend a constant appearing in either
489 // operand. For add, we should only sign extend the RHS.
490 Constant
*NewConst
= (SafeWrap
.contains(I
) &&
491 (I
->getOpcode() == Instruction::ICmp
|| i
== 1) &&
492 I
->getOpcode() != Instruction::Sub
)
493 ? ConstantExpr::getSExt(Const
, ExtTy
)
494 : ConstantExpr::getZExt(Const
, ExtTy
);
495 I
->setOperand(i
, NewConst
);
496 } else if (isa
<UndefValue
>(Op
))
497 I
->setOperand(i
, ConstantInt::get(ExtTy
, 0));
500 // Mutate the result type, unless this is an icmp or switch.
501 if (!isa
<ICmpInst
>(I
) && !isa
<SwitchInst
>(I
)) {
502 I
->mutateType(ExtTy
);
508 void IRPromoter::TruncateSinks() {
509 LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
511 IRBuilder
<> Builder
{Ctx
};
513 auto InsertTrunc
= [&](Value
*V
, Type
*TruncTy
) -> Instruction
* {
514 if (!isa
<Instruction
>(V
) || !isa
<IntegerType
>(V
->getType()))
517 if ((!Promoted
.count(V
) && !NewInsts
.count(V
)) || Sources
.count(V
))
520 LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy
<< " Trunc for "
522 Builder
.SetInsertPoint(cast
<Instruction
>(V
));
523 auto *Trunc
= dyn_cast
<Instruction
>(Builder
.CreateTrunc(V
, TruncTy
));
525 NewInsts
.insert(Trunc
);
529 // Fix up any stores or returns that use the results of the promoted
531 for (auto *I
: Sinks
) {
532 LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I
<< "\n");
534 // Handle calls separately as we need to iterate over arg operands.
535 if (auto *Call
= dyn_cast
<CallInst
>(I
)) {
536 for (unsigned i
= 0; i
< Call
->arg_size(); ++i
) {
537 Value
*Arg
= Call
->getArgOperand(i
);
538 Type
*Ty
= TruncTysMap
[Call
][i
];
539 if (Instruction
*Trunc
= InsertTrunc(Arg
, Ty
)) {
540 Trunc
->moveBefore(Call
);
541 Call
->setArgOperand(i
, Trunc
);
547 // Special case switches because we need to truncate the condition.
548 if (auto *Switch
= dyn_cast
<SwitchInst
>(I
)) {
549 Type
*Ty
= TruncTysMap
[Switch
][0];
550 if (Instruction
*Trunc
= InsertTrunc(Switch
->getCondition(), Ty
)) {
551 Trunc
->moveBefore(Switch
);
552 Switch
->setCondition(Trunc
);
557 // Don't insert a trunc for a zext which can still legally promote.
558 // Nor insert a trunc when the input value to that trunc has the same width
559 // as the zext we are inserting it for. When this happens the input operand
560 // for the zext will be promoted to the same width as the zext's return type
561 // rendering that zext unnecessary. This zext gets removed before the end
563 if (auto ZExt
= dyn_cast
<ZExtInst
>(I
))
564 if (ZExt
->getType()->getScalarSizeInBits() >= PromotedWidth
)
567 // Now handle the others.
568 for (unsigned i
= 0; i
< I
->getNumOperands(); ++i
) {
569 Type
*Ty
= TruncTysMap
[I
][i
];
570 if (Instruction
*Trunc
= InsertTrunc(I
->getOperand(i
), Ty
)) {
571 Trunc
->moveBefore(I
);
572 I
->setOperand(i
, Trunc
);
578 void IRPromoter::Cleanup() {
579 LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
580 // Some zexts will now have become redundant, along with their trunc
581 // operands, so remove them
582 for (auto *V
: Visited
) {
583 if (!isa
<ZExtInst
>(V
))
586 auto ZExt
= cast
<ZExtInst
>(V
);
587 if (ZExt
->getDestTy() != ExtTy
)
590 Value
*Src
= ZExt
->getOperand(0);
591 if (ZExt
->getSrcTy() == ZExt
->getDestTy()) {
592 LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
594 ReplaceAllUsersOfWith(ZExt
, Src
);
598 // We've inserted a trunc for a zext sink, but we already know that the
599 // input is in range, negating the need for the trunc.
600 if (NewInsts
.count(Src
) && isa
<TruncInst
>(Src
)) {
601 auto *Trunc
= cast
<TruncInst
>(Src
);
602 assert(Trunc
->getOperand(0)->getType() == ExtTy
&&
603 "expected inserted trunc to be operating on i32");
604 ReplaceAllUsersOfWith(ZExt
, Trunc
->getOperand(0));
608 for (auto *I
: InstsToRemove
) {
609 LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I
<< "\n");
610 I
->dropAllReferences();
614 void IRPromoter::ConvertTruncs() {
615 LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
616 IRBuilder
<> Builder
{Ctx
};
618 for (auto *V
: Visited
) {
619 if (!isa
<TruncInst
>(V
) || Sources
.count(V
))
622 auto *Trunc
= cast
<TruncInst
>(V
);
623 Builder
.SetInsertPoint(Trunc
);
624 IntegerType
*SrcTy
= cast
<IntegerType
>(Trunc
->getOperand(0)->getType());
625 IntegerType
*DestTy
= cast
<IntegerType
>(TruncTysMap
[Trunc
][0]);
627 unsigned NumBits
= DestTy
->getScalarSizeInBits();
629 ConstantInt::get(SrcTy
, APInt::getMaxValue(NumBits
).getZExtValue());
630 Value
*Masked
= Builder
.CreateAnd(Trunc
->getOperand(0), Mask
);
632 if (auto *I
= dyn_cast
<Instruction
>(Masked
))
635 ReplaceAllUsersOfWith(Trunc
, Masked
);
639 void IRPromoter::Mutate() {
640 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
641 << PromotedWidth
<< "-bits\n");
643 // Cache original types of the values that will likely need truncating
644 for (auto *I
: Sinks
) {
645 if (auto *Call
= dyn_cast
<CallInst
>(I
)) {
646 for (Value
*Arg
: Call
->args())
647 TruncTysMap
[Call
].push_back(Arg
->getType());
648 } else if (auto *Switch
= dyn_cast
<SwitchInst
>(I
))
649 TruncTysMap
[I
].push_back(Switch
->getCondition()->getType());
651 for (unsigned i
= 0; i
< I
->getNumOperands(); ++i
)
652 TruncTysMap
[I
].push_back(I
->getOperand(i
)->getType());
655 for (auto *V
: Visited
) {
656 if (!isa
<TruncInst
>(V
) || Sources
.count(V
))
658 auto *Trunc
= cast
<TruncInst
>(V
);
659 TruncTysMap
[Trunc
].push_back(Trunc
->getDestTy());
662 // Insert zext instructions between sources and their users.
665 // Promote visited instructions, mutating their types in place.
668 // Convert any truncs, that aren't sources, into AND masks.
671 // Insert trunc instructions for use by calls, stores etc...
674 // Finally, remove unecessary zexts and truncs, delete old instructions and
675 // clear the data structures.
678 LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
681 /// We disallow booleans to make life easier when dealing with icmps but allow
682 /// any other integer that fits in a scalar register. Void types are accepted
683 /// so we can handle switches.
684 bool TypePromotion::isSupportedType(Value
*V
) {
685 Type
*Ty
= V
->getType();
687 // Allow voids and pointers, these won't be promoted.
688 if (Ty
->isVoidTy() || Ty
->isPointerTy())
691 if (!isa
<IntegerType
>(Ty
) || cast
<IntegerType
>(Ty
)->getBitWidth() == 1 ||
692 cast
<IntegerType
>(Ty
)->getBitWidth() > RegisterBitWidth
)
695 return LessOrEqualTypeSize(V
);
698 /// We accept most instructions, as well as Arguments and ConstantInsts. We
699 /// Disallow casts other than zext and truncs and only allow calls if their
700 /// return value is zeroext. We don't allow opcodes that can introduce sign
702 bool TypePromotion::isSupportedValue(Value
*V
) {
703 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
704 switch (I
->getOpcode()) {
706 return isa
<BinaryOperator
>(I
) && isSupportedType(I
) &&
707 !GenerateSignBits(I
);
708 case Instruction::GetElementPtr
:
709 case Instruction::Store
:
710 case Instruction::Br
:
711 case Instruction::Switch
:
713 case Instruction::PHI
:
714 case Instruction::Select
:
715 case Instruction::Ret
:
716 case Instruction::Load
:
717 case Instruction::Trunc
:
718 case Instruction::BitCast
:
719 return isSupportedType(I
);
720 case Instruction::ZExt
:
721 return isSupportedType(I
->getOperand(0));
722 case Instruction::ICmp
:
723 // Now that we allow small types than TypeSize, only allow icmp of
724 // TypeSize because they will require a trunc to be legalised.
725 // TODO: Allow icmp of smaller types, and calculate at the end
726 // whether the transform would be beneficial.
727 if (isa
<PointerType
>(I
->getOperand(0)->getType()))
729 return EqualTypeSize(I
->getOperand(0));
730 case Instruction::Call
: {
731 // Special cases for calls as we need to check for zeroext
732 // TODO We should accept calls even if they don't have zeroext, as they
733 // can still be sinks.
734 auto *Call
= cast
<CallInst
>(I
);
735 return isSupportedType(Call
) &&
736 Call
->hasRetAttr(Attribute::AttrKind::ZExt
);
739 } else if (isa
<Constant
>(V
) && !isa
<ConstantExpr
>(V
)) {
740 return isSupportedType(V
);
741 } else if (isa
<Argument
>(V
))
742 return isSupportedType(V
);
744 return isa
<BasicBlock
>(V
);
747 /// Check that the type of V would be promoted and that the original type is
748 /// smaller than the targeted promoted type. Check that we're not trying to
749 /// promote something larger than our base 'TypeSize' type.
750 bool TypePromotion::isLegalToPromote(Value
*V
) {
751 auto *I
= dyn_cast
<Instruction
>(V
);
755 if (SafeToPromote
.count(I
))
758 if (isPromotedResultSafe(I
) || isSafeWrap(I
)) {
759 SafeToPromote
.insert(I
);
765 bool TypePromotion::TryToPromote(Value
*V
, unsigned PromotedWidth
) {
766 Type
*OrigTy
= V
->getType();
767 TypeSize
= OrigTy
->getPrimitiveSizeInBits().getFixedSize();
768 SafeToPromote
.clear();
771 if (!isSupportedValue(V
) || !shouldPromote(V
) || !isLegalToPromote(V
))
774 LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V
<< ", from "
775 << TypeSize
<< " bits to " << PromotedWidth
<< "\n");
777 SetVector
<Value
*> WorkList
;
778 SetVector
<Value
*> Sources
;
779 SetVector
<Instruction
*> Sinks
;
780 SetVector
<Value
*> CurrentVisited
;
783 // Return true if V was added to the worklist as a supported instruction,
784 // if it was already visited, or if we don't need to explore it (e.g.
785 // pointer values and GEPs), and false otherwise.
786 auto AddLegalInst
= [&](Value
*V
) {
787 if (CurrentVisited
.count(V
))
790 // Ignore GEPs because they don't need promoting and the constant indices
791 // will prevent the transformation.
792 if (isa
<GetElementPtrInst
>(V
))
795 if (!isSupportedValue(V
) || (shouldPromote(V
) && !isLegalToPromote(V
))) {
796 LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V
<< "\n");
804 // Iterate through, and add to, a tree of operands and users in the use-def.
805 while (!WorkList
.empty()) {
806 Value
*V
= WorkList
.pop_back_val();
807 if (CurrentVisited
.count(V
))
810 // Ignore non-instructions, other than arguments.
811 if (!isa
<Instruction
>(V
) && !isSource(V
))
814 // If we've already visited this value from somewhere, bail now because
815 // the tree has already been explored.
816 // TODO: This could limit the transform, ie if we try to promote something
817 // from an i8 and fail first, before trying an i16.
818 if (AllVisited
.count(V
))
821 CurrentVisited
.insert(V
);
822 AllVisited
.insert(V
);
824 // Calls can be both sources and sinks.
826 Sinks
.insert(cast
<Instruction
>(V
));
831 if (!isSink(V
) && !isSource(V
)) {
832 if (auto *I
= dyn_cast
<Instruction
>(V
)) {
833 // Visit operands of any instruction visited.
834 for (auto &U
: I
->operands()) {
835 if (!AddLegalInst(U
))
841 // Don't visit users of a node which isn't going to be mutated unless its a
843 if (isSource(V
) || shouldPromote(V
)) {
844 for (Use
&U
: V
->uses()) {
845 if (!AddLegalInst(U
.getUser()))
852 dbgs() << "IR Promotion: Visited nodes:\n";
853 for (auto *I
: CurrentVisited
)
857 unsigned ToPromote
= 0;
858 unsigned NonFreeArgs
= 0;
859 SmallPtrSet
<BasicBlock
*, 4> Blocks
;
860 for (auto *V
: CurrentVisited
) {
861 if (auto *I
= dyn_cast
<Instruction
>(V
))
862 Blocks
.insert(I
->getParent());
864 if (Sources
.count(V
)) {
865 if (auto *Arg
= dyn_cast
<Argument
>(V
))
866 if (!Arg
->hasZExtAttr() && !Arg
->hasSExtAttr())
871 if (Sinks
.count(cast
<Instruction
>(V
)))
876 // DAG optimizations should be able to handle these cases better, especially
877 // for function arguments.
878 if (!isa
<PHINode
>(V
) && (ToPromote
< 2 || (Blocks
.size() == 1 &&
879 (NonFreeArgs
> SafeWrap
.size()))))
882 IRPromoter
Promoter(*Ctx
, PromotedWidth
, CurrentVisited
, Sources
, Sinks
,
883 SafeWrap
, InstsToRemove
);
888 bool TypePromotion::runOnFunction(Function
&F
) {
889 if (skipFunction(F
) || DisablePromotion
)
892 LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F
.getName() << "\n");
894 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
899 SafeToPromote
.clear();
901 bool MadeChange
= false;
902 const DataLayout
&DL
= F
.getParent()->getDataLayout();
903 const TargetMachine
&TM
= TPC
->getTM
<TargetMachine
>();
904 const TargetSubtargetInfo
*SubtargetInfo
= TM
.getSubtargetImpl(F
);
905 const TargetLowering
*TLI
= SubtargetInfo
->getTargetLowering();
906 const TargetTransformInfo
&TII
=
907 getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
908 const LoopInfo
&LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
910 TII
.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar
).getFixedSize();
911 Ctx
= &F
.getParent()->getContext();
913 // Return the preferred integer width of the instruction, or zero if we
915 auto GetPromoteWidth
= [&](Instruction
*I
) -> uint32_t {
916 if (!isa
<IntegerType
>(I
->getType()))
919 EVT SrcVT
= TLI
->getValueType(DL
, I
->getType());
920 if (SrcVT
.isSimple() && TLI
->isTypeLegal(SrcVT
.getSimpleVT()))
923 if (TLI
->getTypeAction(*Ctx
, SrcVT
) != TargetLowering::TypePromoteInteger
)
926 EVT PromotedVT
= TLI
->getTypeToTransformTo(*Ctx
, SrcVT
);
927 if (RegisterBitWidth
< PromotedVT
.getFixedSizeInBits()) {
928 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
929 << "for promoted type\n");
933 // TODO: Should we prefer to use RegisterBitWidth instead?
934 return PromotedVT
.getFixedSizeInBits();
937 auto BBIsInLoop
= [&](BasicBlock
*BB
) -> bool {
944 for (BasicBlock
&BB
: F
) {
945 for (Instruction
&I
: BB
) {
946 if (AllVisited
.count(&I
))
949 if (isa
<ZExtInst
>(&I
) && isa
<PHINode
>(I
.getOperand(0)) &&
950 isa
<IntegerType
>(I
.getType()) && BBIsInLoop(&BB
)) {
951 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I
.getOperand(0)
953 EVT ZExtVT
= TLI
->getValueType(DL
, I
.getType());
954 Instruction
*Phi
= static_cast<Instruction
*>(I
.getOperand(0));
955 auto PromoteWidth
= ZExtVT
.getFixedSizeInBits();
956 if (RegisterBitWidth
< PromoteWidth
) {
957 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
958 << "register for ZExt type\n");
961 MadeChange
|= TryToPromote(Phi
, PromoteWidth
);
962 } else if (auto *ICmp
= dyn_cast
<ICmpInst
>(&I
)) {
963 // Search up from icmps to try to promote their operands.
964 // Skip signed or pointer compares
965 if (ICmp
->isSigned())
968 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp
<< "\n");
970 for (auto &Op
: ICmp
->operands()) {
971 if (auto *OpI
= dyn_cast
<Instruction
>(Op
)) {
972 if (auto PromotedWidth
= GetPromoteWidth(OpI
)) {
973 MadeChange
|= TryToPromote(OpI
, PromotedWidth
);
980 if (!InstsToRemove
.empty()) {
981 for (auto *I
: InstsToRemove
)
982 I
->eraseFromParent();
983 InstsToRemove
.clear();
988 SafeToPromote
.clear();
994 INITIALIZE_PASS_BEGIN(TypePromotion
, DEBUG_TYPE
, PASS_NAME
, false, false)
995 INITIALIZE_PASS_END(TypePromotion
, DEBUG_TYPE
, PASS_NAME
, false, false)
997 char TypePromotion::ID
= 0;
999 FunctionPass
*llvm::createTypePromotionPass() { return new TypePromotion(); }