[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / lib / CodeGen / TypePromotion.cpp
blob5ed2674fe6533808cdef2621c9b373df38265c85
1 //===----- TypePromotion.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This is an opcode based type promotion pass for small types that would
11 /// otherwise be promoted during legalisation. This works around the limitations
12 /// of selection dag for cyclic regions. The search begins from icmp
13 /// instructions operands where a tree, consisting of non-wrapping or safe
14 /// wrapping instructions, is built, checked and promoted if possible.
15 ///
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Target/TargetMachine.h"
41 #define DEBUG_TYPE "type-promotion"
42 #define PASS_NAME "Type Promotion"
44 using namespace llvm;
46 static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
47 cl::init(false),
48 cl::desc("Disable type promotion pass"));
50 // The goal of this pass is to enable more efficient code generation for
51 // operations on narrow types (i.e. types with < 32-bits) and this is a
52 // motivating IR code example:
54 // define hidden i32 @cmp(i8 zeroext) {
55 // %2 = add i8 %0, -49
56 // %3 = icmp ult i8 %2, 3
57 // ..
58 // }
60 // The issue here is that i8 is type-legalized to i32 because i8 is not a
61 // legal type. Thus, arithmetic is done in integer-precision, but then the
62 // byte value is masked out as follows:
64 // t19: i32 = add t4, Constant:i32<-49>
65 // t24: i32 = and t19, Constant:i32<255>
67 // Consequently, we generate code like this:
69 // subs r0, #49
70 // uxtb r1, r0
71 // cmp r1, #3
73 // This shows that masking out the byte value results in generation of
74 // the UXTB instruction. This is not optimal as r0 already contains the byte
75 // value we need, and so instead we can just generate:
77 // sub.w r1, r0, #49
78 // cmp r1, #3
80 // We achieve this by type promoting the IR to i32 like so for this example:
82 // define i32 @cmp(i8 zeroext %c) {
83 // %0 = zext i8 %c to i32
84 // %c.off = add i32 %0, -49
85 // %1 = icmp ult i32 %c.off, 3
86 // ..
87 // }
89 // For this to be valid and legal, we need to prove that the i32 add is
90 // producing the same value as the i8 addition, and that e.g. no overflow
91 // happens.
93 // A brief sketch of the algorithm and some terminology.
94 // We pattern match interesting IR patterns:
95 // - which have "sources": instructions producing narrow values (i8, i16), and
96 // - they have "sinks": instructions consuming these narrow values.
98 // We collect all instruction connecting sources and sinks in a worklist, so
99 // that we can mutate these instruction and perform type promotion when it is
100 // legal to do so.
102 namespace {
103 class IRPromoter {
104 LLVMContext &Ctx;
105 unsigned PromotedWidth = 0;
106 SetVector<Value *> &Visited;
107 SetVector<Value *> &Sources;
108 SetVector<Instruction *> &Sinks;
109 SmallPtrSetImpl<Instruction *> &SafeWrap;
110 SmallPtrSetImpl<Instruction *> &InstsToRemove;
111 IntegerType *ExtTy = nullptr;
112 SmallPtrSet<Value *, 8> NewInsts;
113 DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
114 SmallPtrSet<Value *, 8> Promoted;
116 void ReplaceAllUsersOfWith(Value *From, Value *To);
117 void ExtendSources();
118 void ConvertTruncs();
119 void PromoteTree();
120 void TruncateSinks();
121 void Cleanup();
123 public:
124 IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited,
125 SetVector<Value *> &sources, SetVector<Instruction *> &sinks,
126 SmallPtrSetImpl<Instruction *> &wrap,
127 SmallPtrSetImpl<Instruction *> &instsToRemove)
128 : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources),
129 Sinks(sinks), SafeWrap(wrap), InstsToRemove(instsToRemove) {
130 ExtTy = IntegerType::get(Ctx, PromotedWidth);
133 void Mutate();
136 class TypePromotion : public FunctionPass {
137 unsigned TypeSize = 0;
138 LLVMContext *Ctx = nullptr;
139 unsigned RegisterBitWidth = 0;
140 SmallPtrSet<Value *, 16> AllVisited;
141 SmallPtrSet<Instruction *, 8> SafeToPromote;
142 SmallPtrSet<Instruction *, 4> SafeWrap;
143 SmallPtrSet<Instruction *, 4> InstsToRemove;
145 // Does V have the same size result type as TypeSize.
146 bool EqualTypeSize(Value *V);
147 // Does V have the same size, or narrower, result type as TypeSize.
148 bool LessOrEqualTypeSize(Value *V);
149 // Does V have a result type that is wider than TypeSize.
150 bool GreaterThanTypeSize(Value *V);
151 // Does V have a result type that is narrower than TypeSize.
152 bool LessThanTypeSize(Value *V);
153 // Should V be a leaf in the promote tree?
154 bool isSource(Value *V);
155 // Should V be a root in the promotion tree?
156 bool isSink(Value *V);
157 // Should we change the result type of V? It will result in the users of V
158 // being visited.
159 bool shouldPromote(Value *V);
160 // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
161 // result won't affect the computation?
162 bool isSafeWrap(Instruction *I);
163 // Can V have its integer type promoted, or can the type be ignored.
164 bool isSupportedType(Value *V);
165 // Is V an instruction with a supported opcode or another value that we can
166 // handle, such as constants and basic blocks.
167 bool isSupportedValue(Value *V);
168 // Is V an instruction thats result can trivially promoted, or has safe
169 // wrapping.
170 bool isLegalToPromote(Value *V);
171 bool TryToPromote(Value *V, unsigned PromotedWidth);
173 public:
174 static char ID;
176 TypePromotion() : FunctionPass(ID) {}
178 void getAnalysisUsage(AnalysisUsage &AU) const override {
179 AU.addRequired<LoopInfoWrapperPass>();
180 AU.addRequired<TargetTransformInfoWrapperPass>();
181 AU.addRequired<TargetPassConfig>();
182 AU.setPreservesCFG();
183 AU.addPreserved<LoopInfoWrapperPass>();
186 StringRef getPassName() const override { return PASS_NAME; }
188 bool runOnFunction(Function &F) override;
191 } // namespace
193 static bool GenerateSignBits(Instruction *I) {
194 unsigned Opc = I->getOpcode();
195 return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
196 Opc == Instruction::SRem || Opc == Instruction::SExt;
199 bool TypePromotion::EqualTypeSize(Value *V) {
200 return V->getType()->getScalarSizeInBits() == TypeSize;
203 bool TypePromotion::LessOrEqualTypeSize(Value *V) {
204 return V->getType()->getScalarSizeInBits() <= TypeSize;
207 bool TypePromotion::GreaterThanTypeSize(Value *V) {
208 return V->getType()->getScalarSizeInBits() > TypeSize;
211 bool TypePromotion::LessThanTypeSize(Value *V) {
212 return V->getType()->getScalarSizeInBits() < TypeSize;
215 /// Return true if the given value is a source in the use-def chain, producing
216 /// a narrow 'TypeSize' value. These values will be zext to start the promotion
217 /// of the tree to i32. We guarantee that these won't populate the upper bits
218 /// of the register. ZExt on the loads will be free, and the same for call
219 /// return values because we only accept ones that guarantee a zeroext ret val.
220 /// Many arguments will have the zeroext attribute too, so those would be free
221 /// too.
222 bool TypePromotion::isSource(Value *V) {
223 if (!isa<IntegerType>(V->getType()))
224 return false;
226 // TODO Allow zext to be sources.
227 if (isa<Argument>(V))
228 return true;
229 else if (isa<LoadInst>(V))
230 return true;
231 else if (isa<BitCastInst>(V))
232 return true;
233 else if (auto *Call = dyn_cast<CallInst>(V))
234 return Call->hasRetAttr(Attribute::AttrKind::ZExt);
235 else if (auto *Trunc = dyn_cast<TruncInst>(V))
236 return EqualTypeSize(Trunc);
237 return false;
240 /// Return true if V will require any promoted values to be truncated for the
241 /// the IR to remain valid. We can't mutate the value type of these
242 /// instructions.
243 bool TypePromotion::isSink(Value *V) {
244 // TODO The truncate also isn't actually necessary because we would already
245 // proved that the data value is kept within the range of the original data
246 // type. We currently remove any truncs inserted for handling zext sinks.
248 // Sinks are:
249 // - points where the value in the register is being observed, such as an
250 // icmp, switch or store.
251 // - points where value types have to match, such as calls and returns.
252 // - zext are included to ease the transformation and are generally removed
253 // later on.
254 if (auto *Store = dyn_cast<StoreInst>(V))
255 return LessOrEqualTypeSize(Store->getValueOperand());
256 if (auto *Return = dyn_cast<ReturnInst>(V))
257 return LessOrEqualTypeSize(Return->getReturnValue());
258 if (auto *ZExt = dyn_cast<ZExtInst>(V))
259 return GreaterThanTypeSize(ZExt);
260 if (auto *Switch = dyn_cast<SwitchInst>(V))
261 return LessThanTypeSize(Switch->getCondition());
262 if (auto *ICmp = dyn_cast<ICmpInst>(V))
263 return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
265 return isa<CallInst>(V);
268 /// Return whether this instruction can safely wrap.
269 bool TypePromotion::isSafeWrap(Instruction *I) {
270 // We can support a potentially wrapping instruction (I) if:
271 // - It is only used by an unsigned icmp.
272 // - The icmp uses a constant.
273 // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
274 // around zero to become a larger number than before.
275 // - The wrapping instruction (I) also uses a constant.
277 // We can then use the two constants to calculate whether the result would
278 // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
279 // just underflows the range, the icmp would give the same result whether the
280 // result has been truncated or not. We calculate this by:
281 // - Zero extending both constants, if needed, to RegisterBitWidth.
282 // - Take the absolute value of I's constant, adding this to the icmp const.
283 // - Check that this value is not out of range for small type. If it is, it
284 // means that it has underflowed enough to wrap around the icmp constant.
286 // For example:
288 // %sub = sub i8 %a, 2
289 // %cmp = icmp ule i8 %sub, 254
291 // If %a = 0, %sub = -2 == FE == 254
292 // But if this is evalulated as a i32
293 // %sub = -2 == FF FF FF FE == 4294967294
294 // So the unsigned compares (i8 and i32) would not yield the same result.
296 // Another way to look at it is:
297 // %a - 2 <= 254
298 // %a + 2 <= 254 + 2
299 // %a <= 256
300 // And we can't represent 256 in the i8 format, so we don't support it.
302 // Whereas:
304 // %sub i8 %a, 1
305 // %cmp = icmp ule i8 %sub, 254
307 // If %a = 0, %sub = -1 == FF == 255
308 // As i32:
309 // %sub = -1 == FF FF FF FF == 4294967295
311 // In this case, the unsigned compare results would be the same and this
312 // would also be true for ult, uge and ugt:
313 // - (255 < 254) == (0xFFFFFFFF < 254) == false
314 // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
315 // - (255 > 254) == (0xFFFFFFFF > 254) == true
316 // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
318 // To demonstrate why we can't handle increasing values:
320 // %add = add i8 %a, 2
321 // %cmp = icmp ult i8 %add, 127
323 // If %a = 254, %add = 256 == (i8 1)
324 // As i32:
325 // %add = 256
327 // (1 < 127) != (256 < 127)
329 unsigned Opc = I->getOpcode();
330 if (Opc != Instruction::Add && Opc != Instruction::Sub)
331 return false;
333 if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
334 !isa<ConstantInt>(I->getOperand(1)))
335 return false;
337 // Don't support an icmp that deals with sign bits.
338 auto *CI = cast<ICmpInst>(*I->user_begin());
339 if (CI->isSigned() || CI->isEquality())
340 return false;
342 ConstantInt *ICmpConstant = nullptr;
343 if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
344 ICmpConstant = Const;
345 else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
346 ICmpConstant = Const;
347 else
348 return false;
350 const APInt &ICmpConst = ICmpConstant->getValue();
351 APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
352 if (Opc == Instruction::Sub)
353 OverflowConst = -OverflowConst;
354 if (!OverflowConst.isNonPositive())
355 return false;
357 // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
358 // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
359 // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
360 if (OverflowConst.sgt(ICmpConst)) {
361 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
362 << "const of " << *I << "\n");
363 SafeWrap.insert(I);
364 return true;
365 } else {
366 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
367 << "const of " << *I << " and " << *CI << "\n");
368 SafeWrap.insert(I);
369 SafeWrap.insert(CI);
370 return true;
372 return false;
375 bool TypePromotion::shouldPromote(Value *V) {
376 if (!isa<IntegerType>(V->getType()) || isSink(V))
377 return false;
379 if (isSource(V))
380 return true;
382 auto *I = dyn_cast<Instruction>(V);
383 if (!I)
384 return false;
386 if (isa<ICmpInst>(I))
387 return false;
389 return true;
392 /// Return whether we can safely mutate V's type to ExtTy without having to be
393 /// concerned with zero extending or truncation.
394 static bool isPromotedResultSafe(Instruction *I) {
395 if (GenerateSignBits(I))
396 return false;
398 if (!isa<OverflowingBinaryOperator>(I))
399 return true;
401 return I->hasNoUnsignedWrap();
404 void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
405 SmallVector<Instruction *, 4> Users;
406 Instruction *InstTo = dyn_cast<Instruction>(To);
407 bool ReplacedAll = true;
409 LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
410 << "\n");
412 for (Use &U : From->uses()) {
413 auto *User = cast<Instruction>(U.getUser());
414 if (InstTo && User->isIdenticalTo(InstTo)) {
415 ReplacedAll = false;
416 continue;
418 Users.push_back(User);
421 for (auto *U : Users)
422 U->replaceUsesOfWith(From, To);
424 if (ReplacedAll)
425 if (auto *I = dyn_cast<Instruction>(From))
426 InstsToRemove.insert(I);
429 void IRPromoter::ExtendSources() {
430 IRBuilder<> Builder{Ctx};
432 auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
433 assert(V->getType() != ExtTy && "zext already extends to i32");
434 LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
435 Builder.SetInsertPoint(InsertPt);
436 if (auto *I = dyn_cast<Instruction>(V))
437 Builder.SetCurrentDebugLocation(I->getDebugLoc());
439 Value *ZExt = Builder.CreateZExt(V, ExtTy);
440 if (auto *I = dyn_cast<Instruction>(ZExt)) {
441 if (isa<Argument>(V))
442 I->moveBefore(InsertPt);
443 else
444 I->moveAfter(InsertPt);
445 NewInsts.insert(I);
448 ReplaceAllUsersOfWith(V, ZExt);
451 // Now, insert extending instructions between the sources and their users.
452 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
453 for (auto *V : Sources) {
454 LLVM_DEBUG(dbgs() << " - " << *V << "\n");
455 if (auto *I = dyn_cast<Instruction>(V))
456 InsertZExt(I, I);
457 else if (auto *Arg = dyn_cast<Argument>(V)) {
458 BasicBlock &BB = Arg->getParent()->front();
459 InsertZExt(Arg, &*BB.getFirstInsertionPt());
460 } else {
461 llvm_unreachable("unhandled source that needs extending");
463 Promoted.insert(V);
467 void IRPromoter::PromoteTree() {
468 LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
470 // Mutate the types of the instructions within the tree. Here we handle
471 // constant operands.
472 for (auto *V : Visited) {
473 if (Sources.count(V))
474 continue;
476 auto *I = cast<Instruction>(V);
477 if (Sinks.count(I))
478 continue;
480 for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
481 Value *Op = I->getOperand(i);
482 if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
483 continue;
485 if (auto *Const = dyn_cast<ConstantInt>(Op)) {
486 // For subtract, we don't need to sext the constant. We only put it in
487 // SafeWrap because SafeWrap.size() is used elsewhere.
488 // For cmp, we need to sign extend a constant appearing in either
489 // operand. For add, we should only sign extend the RHS.
490 Constant *NewConst = (SafeWrap.contains(I) &&
491 (I->getOpcode() == Instruction::ICmp || i == 1) &&
492 I->getOpcode() != Instruction::Sub)
493 ? ConstantExpr::getSExt(Const, ExtTy)
494 : ConstantExpr::getZExt(Const, ExtTy);
495 I->setOperand(i, NewConst);
496 } else if (isa<UndefValue>(Op))
497 I->setOperand(i, ConstantInt::get(ExtTy, 0));
500 // Mutate the result type, unless this is an icmp or switch.
501 if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
502 I->mutateType(ExtTy);
503 Promoted.insert(I);
508 void IRPromoter::TruncateSinks() {
509 LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
511 IRBuilder<> Builder{Ctx};
513 auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
514 if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
515 return nullptr;
517 if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
518 return nullptr;
520 LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
521 << *V << "\n");
522 Builder.SetInsertPoint(cast<Instruction>(V));
523 auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
524 if (Trunc)
525 NewInsts.insert(Trunc);
526 return Trunc;
529 // Fix up any stores or returns that use the results of the promoted
530 // chain.
531 for (auto *I : Sinks) {
532 LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
534 // Handle calls separately as we need to iterate over arg operands.
535 if (auto *Call = dyn_cast<CallInst>(I)) {
536 for (unsigned i = 0; i < Call->arg_size(); ++i) {
537 Value *Arg = Call->getArgOperand(i);
538 Type *Ty = TruncTysMap[Call][i];
539 if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
540 Trunc->moveBefore(Call);
541 Call->setArgOperand(i, Trunc);
544 continue;
547 // Special case switches because we need to truncate the condition.
548 if (auto *Switch = dyn_cast<SwitchInst>(I)) {
549 Type *Ty = TruncTysMap[Switch][0];
550 if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
551 Trunc->moveBefore(Switch);
552 Switch->setCondition(Trunc);
554 continue;
557 // Don't insert a trunc for a zext which can still legally promote.
558 // Nor insert a trunc when the input value to that trunc has the same width
559 // as the zext we are inserting it for. When this happens the input operand
560 // for the zext will be promoted to the same width as the zext's return type
561 // rendering that zext unnecessary. This zext gets removed before the end
562 // of the pass.
563 if (auto ZExt = dyn_cast<ZExtInst>(I))
564 if (ZExt->getType()->getScalarSizeInBits() >= PromotedWidth)
565 continue;
567 // Now handle the others.
568 for (unsigned i = 0; i < I->getNumOperands(); ++i) {
569 Type *Ty = TruncTysMap[I][i];
570 if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
571 Trunc->moveBefore(I);
572 I->setOperand(i, Trunc);
578 void IRPromoter::Cleanup() {
579 LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
580 // Some zexts will now have become redundant, along with their trunc
581 // operands, so remove them
582 for (auto *V : Visited) {
583 if (!isa<ZExtInst>(V))
584 continue;
586 auto ZExt = cast<ZExtInst>(V);
587 if (ZExt->getDestTy() != ExtTy)
588 continue;
590 Value *Src = ZExt->getOperand(0);
591 if (ZExt->getSrcTy() == ZExt->getDestTy()) {
592 LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
593 << "\n");
594 ReplaceAllUsersOfWith(ZExt, Src);
595 continue;
598 // We've inserted a trunc for a zext sink, but we already know that the
599 // input is in range, negating the need for the trunc.
600 if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
601 auto *Trunc = cast<TruncInst>(Src);
602 assert(Trunc->getOperand(0)->getType() == ExtTy &&
603 "expected inserted trunc to be operating on i32");
604 ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
608 for (auto *I : InstsToRemove) {
609 LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
610 I->dropAllReferences();
614 void IRPromoter::ConvertTruncs() {
615 LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
616 IRBuilder<> Builder{Ctx};
618 for (auto *V : Visited) {
619 if (!isa<TruncInst>(V) || Sources.count(V))
620 continue;
622 auto *Trunc = cast<TruncInst>(V);
623 Builder.SetInsertPoint(Trunc);
624 IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
625 IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
627 unsigned NumBits = DestTy->getScalarSizeInBits();
628 ConstantInt *Mask =
629 ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
630 Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
632 if (auto *I = dyn_cast<Instruction>(Masked))
633 NewInsts.insert(I);
635 ReplaceAllUsersOfWith(Trunc, Masked);
639 void IRPromoter::Mutate() {
640 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
641 << PromotedWidth << "-bits\n");
643 // Cache original types of the values that will likely need truncating
644 for (auto *I : Sinks) {
645 if (auto *Call = dyn_cast<CallInst>(I)) {
646 for (Value *Arg : Call->args())
647 TruncTysMap[Call].push_back(Arg->getType());
648 } else if (auto *Switch = dyn_cast<SwitchInst>(I))
649 TruncTysMap[I].push_back(Switch->getCondition()->getType());
650 else {
651 for (unsigned i = 0; i < I->getNumOperands(); ++i)
652 TruncTysMap[I].push_back(I->getOperand(i)->getType());
655 for (auto *V : Visited) {
656 if (!isa<TruncInst>(V) || Sources.count(V))
657 continue;
658 auto *Trunc = cast<TruncInst>(V);
659 TruncTysMap[Trunc].push_back(Trunc->getDestTy());
662 // Insert zext instructions between sources and their users.
663 ExtendSources();
665 // Promote visited instructions, mutating their types in place.
666 PromoteTree();
668 // Convert any truncs, that aren't sources, into AND masks.
669 ConvertTruncs();
671 // Insert trunc instructions for use by calls, stores etc...
672 TruncateSinks();
674 // Finally, remove unecessary zexts and truncs, delete old instructions and
675 // clear the data structures.
676 Cleanup();
678 LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
681 /// We disallow booleans to make life easier when dealing with icmps but allow
682 /// any other integer that fits in a scalar register. Void types are accepted
683 /// so we can handle switches.
684 bool TypePromotion::isSupportedType(Value *V) {
685 Type *Ty = V->getType();
687 // Allow voids and pointers, these won't be promoted.
688 if (Ty->isVoidTy() || Ty->isPointerTy())
689 return true;
691 if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
692 cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
693 return false;
695 return LessOrEqualTypeSize(V);
698 /// We accept most instructions, as well as Arguments and ConstantInsts. We
699 /// Disallow casts other than zext and truncs and only allow calls if their
700 /// return value is zeroext. We don't allow opcodes that can introduce sign
701 /// bits.
702 bool TypePromotion::isSupportedValue(Value *V) {
703 if (auto *I = dyn_cast<Instruction>(V)) {
704 switch (I->getOpcode()) {
705 default:
706 return isa<BinaryOperator>(I) && isSupportedType(I) &&
707 !GenerateSignBits(I);
708 case Instruction::GetElementPtr:
709 case Instruction::Store:
710 case Instruction::Br:
711 case Instruction::Switch:
712 return true;
713 case Instruction::PHI:
714 case Instruction::Select:
715 case Instruction::Ret:
716 case Instruction::Load:
717 case Instruction::Trunc:
718 case Instruction::BitCast:
719 return isSupportedType(I);
720 case Instruction::ZExt:
721 return isSupportedType(I->getOperand(0));
722 case Instruction::ICmp:
723 // Now that we allow small types than TypeSize, only allow icmp of
724 // TypeSize because they will require a trunc to be legalised.
725 // TODO: Allow icmp of smaller types, and calculate at the end
726 // whether the transform would be beneficial.
727 if (isa<PointerType>(I->getOperand(0)->getType()))
728 return true;
729 return EqualTypeSize(I->getOperand(0));
730 case Instruction::Call: {
731 // Special cases for calls as we need to check for zeroext
732 // TODO We should accept calls even if they don't have zeroext, as they
733 // can still be sinks.
734 auto *Call = cast<CallInst>(I);
735 return isSupportedType(Call) &&
736 Call->hasRetAttr(Attribute::AttrKind::ZExt);
739 } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
740 return isSupportedType(V);
741 } else if (isa<Argument>(V))
742 return isSupportedType(V);
744 return isa<BasicBlock>(V);
747 /// Check that the type of V would be promoted and that the original type is
748 /// smaller than the targeted promoted type. Check that we're not trying to
749 /// promote something larger than our base 'TypeSize' type.
750 bool TypePromotion::isLegalToPromote(Value *V) {
751 auto *I = dyn_cast<Instruction>(V);
752 if (!I)
753 return true;
755 if (SafeToPromote.count(I))
756 return true;
758 if (isPromotedResultSafe(I) || isSafeWrap(I)) {
759 SafeToPromote.insert(I);
760 return true;
762 return false;
765 bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
766 Type *OrigTy = V->getType();
767 TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
768 SafeToPromote.clear();
769 SafeWrap.clear();
771 if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
772 return false;
774 LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
775 << TypeSize << " bits to " << PromotedWidth << "\n");
777 SetVector<Value *> WorkList;
778 SetVector<Value *> Sources;
779 SetVector<Instruction *> Sinks;
780 SetVector<Value *> CurrentVisited;
781 WorkList.insert(V);
783 // Return true if V was added to the worklist as a supported instruction,
784 // if it was already visited, or if we don't need to explore it (e.g.
785 // pointer values and GEPs), and false otherwise.
786 auto AddLegalInst = [&](Value *V) {
787 if (CurrentVisited.count(V))
788 return true;
790 // Ignore GEPs because they don't need promoting and the constant indices
791 // will prevent the transformation.
792 if (isa<GetElementPtrInst>(V))
793 return true;
795 if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
796 LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
797 return false;
800 WorkList.insert(V);
801 return true;
804 // Iterate through, and add to, a tree of operands and users in the use-def.
805 while (!WorkList.empty()) {
806 Value *V = WorkList.pop_back_val();
807 if (CurrentVisited.count(V))
808 continue;
810 // Ignore non-instructions, other than arguments.
811 if (!isa<Instruction>(V) && !isSource(V))
812 continue;
814 // If we've already visited this value from somewhere, bail now because
815 // the tree has already been explored.
816 // TODO: This could limit the transform, ie if we try to promote something
817 // from an i8 and fail first, before trying an i16.
818 if (AllVisited.count(V))
819 return false;
821 CurrentVisited.insert(V);
822 AllVisited.insert(V);
824 // Calls can be both sources and sinks.
825 if (isSink(V))
826 Sinks.insert(cast<Instruction>(V));
828 if (isSource(V))
829 Sources.insert(V);
831 if (!isSink(V) && !isSource(V)) {
832 if (auto *I = dyn_cast<Instruction>(V)) {
833 // Visit operands of any instruction visited.
834 for (auto &U : I->operands()) {
835 if (!AddLegalInst(U))
836 return false;
841 // Don't visit users of a node which isn't going to be mutated unless its a
842 // source.
843 if (isSource(V) || shouldPromote(V)) {
844 for (Use &U : V->uses()) {
845 if (!AddLegalInst(U.getUser()))
846 return false;
851 LLVM_DEBUG({
852 dbgs() << "IR Promotion: Visited nodes:\n";
853 for (auto *I : CurrentVisited)
854 I->dump();
857 unsigned ToPromote = 0;
858 unsigned NonFreeArgs = 0;
859 SmallPtrSet<BasicBlock *, 4> Blocks;
860 for (auto *V : CurrentVisited) {
861 if (auto *I = dyn_cast<Instruction>(V))
862 Blocks.insert(I->getParent());
864 if (Sources.count(V)) {
865 if (auto *Arg = dyn_cast<Argument>(V))
866 if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
867 ++NonFreeArgs;
868 continue;
871 if (Sinks.count(cast<Instruction>(V)))
872 continue;
873 ++ToPromote;
876 // DAG optimizations should be able to handle these cases better, especially
877 // for function arguments.
878 if (!isa<PHINode>(V) && (ToPromote < 2 || (Blocks.size() == 1 &&
879 (NonFreeArgs > SafeWrap.size()))))
880 return false;
882 IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
883 SafeWrap, InstsToRemove);
884 Promoter.Mutate();
885 return true;
888 bool TypePromotion::runOnFunction(Function &F) {
889 if (skipFunction(F) || DisablePromotion)
890 return false;
892 LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
894 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
895 if (!TPC)
896 return false;
898 AllVisited.clear();
899 SafeToPromote.clear();
900 SafeWrap.clear();
901 bool MadeChange = false;
902 const DataLayout &DL = F.getParent()->getDataLayout();
903 const TargetMachine &TM = TPC->getTM<TargetMachine>();
904 const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
905 const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
906 const TargetTransformInfo &TII =
907 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
908 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
909 RegisterBitWidth =
910 TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
911 Ctx = &F.getParent()->getContext();
913 // Return the preferred integer width of the instruction, or zero if we
914 // shouldn't try.
915 auto GetPromoteWidth = [&](Instruction *I) -> uint32_t {
916 if (!isa<IntegerType>(I->getType()))
917 return 0;
919 EVT SrcVT = TLI->getValueType(DL, I->getType());
920 if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
921 return 0;
923 if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger)
924 return 0;
926 EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
927 if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
928 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
929 << "for promoted type\n");
930 return 0;
933 // TODO: Should we prefer to use RegisterBitWidth instead?
934 return PromotedVT.getFixedSizeInBits();
937 auto BBIsInLoop = [&](BasicBlock *BB) -> bool {
938 for (auto *L : LI)
939 if (L->contains(BB))
940 return true;
941 return false;
944 for (BasicBlock &BB : F) {
945 for (Instruction &I : BB) {
946 if (AllVisited.count(&I))
947 continue;
949 if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
950 isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
951 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0)
952 << "\n");
953 EVT ZExtVT = TLI->getValueType(DL, I.getType());
954 Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
955 auto PromoteWidth = ZExtVT.getFixedSizeInBits();
956 if (RegisterBitWidth < PromoteWidth) {
957 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
958 << "register for ZExt type\n");
959 continue;
961 MadeChange |= TryToPromote(Phi, PromoteWidth);
962 } else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) {
963 // Search up from icmps to try to promote their operands.
964 // Skip signed or pointer compares
965 if (ICmp->isSigned())
966 continue;
968 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
970 for (auto &Op : ICmp->operands()) {
971 if (auto *OpI = dyn_cast<Instruction>(Op)) {
972 if (auto PromotedWidth = GetPromoteWidth(OpI)) {
973 MadeChange |= TryToPromote(OpI, PromotedWidth);
974 break;
980 if (!InstsToRemove.empty()) {
981 for (auto *I : InstsToRemove)
982 I->eraseFromParent();
983 InstsToRemove.clear();
987 AllVisited.clear();
988 SafeToPromote.clear();
989 SafeWrap.clear();
991 return MadeChange;
994 INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
995 INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
997 char TypePromotion::ID = 0;
999 FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }