1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// Insert hardware loop intrinsics into loops which are deemed profitable by
10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11 /// two intrinsics: one, outside the loop, to set the loop iteration count and
12 /// another, in the exit block, to decrement the counter. The decremented value
13 /// can either be carried through the loop via a phi or handled in some opaque
14 /// way by the target.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/CodeGen/HardwareLoops.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/BranchProbabilityInfo.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/PassRegistry.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Transforms/Utils.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Transforms/Utils/Local.h"
43 #include "llvm/Transforms/Utils/LoopUtils.h"
44 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
46 #define DEBUG_TYPE "hardware-loops"
48 #define HW_LOOPS_NAME "Hardware Loop Insertion"
53 ForceHardwareLoops("force-hardware-loops", cl::Hidden
, cl::init(false),
54 cl::desc("Force hardware loops intrinsics to be inserted"));
58 "force-hardware-loop-phi", cl::Hidden
, cl::init(false),
59 cl::desc("Force hardware loop counter to be updated through a phi"));
62 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden
, cl::init(false),
63 cl::desc("Force allowance of nested hardware loops"));
65 static cl::opt
<unsigned>
66 LoopDecrement("hardware-loop-decrement", cl::Hidden
, cl::init(1),
67 cl::desc("Set the loop decrement value"));
69 static cl::opt
<unsigned>
70 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden
, cl::init(32),
71 cl::desc("Set the loop counter bitwidth"));
75 "force-hardware-loop-guard", cl::Hidden
, cl::init(false),
76 cl::desc("Force generation of loop guard intrinsic"));
78 STATISTIC(NumHWLoops
, "Number of loops converted to hardware loops");
81 static void debugHWLoopFailure(const StringRef DebugMsg
,
83 dbgs() << "HWLoops: " << DebugMsg
;
92 static OptimizationRemarkAnalysis
93 createHWLoopAnalysis(StringRef RemarkName
, Loop
*L
, Instruction
*I
) {
94 Value
*CodeRegion
= L
->getHeader();
95 DebugLoc DL
= L
->getStartLoc();
98 CodeRegion
= I
->getParent();
99 // If there is no debug location attached to the instruction, revert back to
101 if (I
->getDebugLoc())
102 DL
= I
->getDebugLoc();
105 OptimizationRemarkAnalysis
R(DEBUG_TYPE
, RemarkName
, DL
, CodeRegion
);
106 R
<< "hardware-loop not created: ";
112 void reportHWLoopFailure(const StringRef Msg
, const StringRef ORETag
,
113 OptimizationRemarkEmitter
*ORE
, Loop
*TheLoop
, Instruction
*I
= nullptr) {
114 LLVM_DEBUG(debugHWLoopFailure(Msg
, I
));
115 ORE
->emit(createHWLoopAnalysis(ORETag
, TheLoop
, I
) << Msg
);
118 using TTI
= TargetTransformInfo
;
120 class HardwareLoopsLegacy
: public FunctionPass
{
124 HardwareLoopsLegacy() : FunctionPass(ID
) {
125 initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
128 bool runOnFunction(Function
&F
) override
;
130 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
131 AU
.addRequired
<LoopInfoWrapperPass
>();
132 AU
.addPreserved
<LoopInfoWrapperPass
>();
133 AU
.addRequired
<DominatorTreeWrapperPass
>();
134 AU
.addPreserved
<DominatorTreeWrapperPass
>();
135 AU
.addRequired
<ScalarEvolutionWrapperPass
>();
136 AU
.addPreserved
<ScalarEvolutionWrapperPass
>();
137 AU
.addRequired
<AssumptionCacheTracker
>();
138 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
139 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
140 AU
.addPreserved
<BranchProbabilityInfoWrapperPass
>();
144 class HardwareLoopsImpl
{
146 HardwareLoopsImpl(ScalarEvolution
&SE
, LoopInfo
&LI
, bool PreserveLCSSA
,
147 DominatorTree
&DT
, const DataLayout
&DL
,
148 const TargetTransformInfo
&TTI
, TargetLibraryInfo
*TLI
,
149 AssumptionCache
&AC
, OptimizationRemarkEmitter
*ORE
,
150 HardwareLoopOptions
&Opts
)
151 : SE(SE
), LI(LI
), PreserveLCSSA(PreserveLCSSA
), DT(DT
), DL(DL
), TTI(TTI
),
152 TLI(TLI
), AC(AC
), ORE(ORE
), Opts(Opts
) { }
154 bool run(Function
&F
);
157 // Try to convert the given Loop into a hardware loop.
158 bool TryConvertLoop(Loop
*L
, LLVMContext
&Ctx
);
160 // Given that the target believes the loop to be profitable, try to
162 bool TryConvertLoop(HardwareLoopInfo
&HWLoopInfo
);
168 const DataLayout
&DL
;
169 const TargetTransformInfo
&TTI
;
170 TargetLibraryInfo
*TLI
= nullptr;
172 OptimizationRemarkEmitter
*ORE
;
173 HardwareLoopOptions
&Opts
;
174 bool MadeChange
= false;
178 // Expand the trip count scev into a value that we can use.
179 Value
*InitLoopCount();
181 // Insert the set_loop_iteration intrinsic.
182 Value
*InsertIterationSetup(Value
*LoopCountInit
);
184 // Insert the loop_decrement intrinsic.
185 void InsertLoopDec();
187 // Insert the loop_decrement_reg intrinsic.
188 Instruction
*InsertLoopRegDec(Value
*EltsRem
);
190 // If the target requires the counter value to be updated in the loop,
191 // insert a phi to hold the value. The intended purpose is for use by
192 // loop_decrement_reg.
193 PHINode
*InsertPHICounter(Value
*NumElts
, Value
*EltsRem
);
195 // Create a new cmp, that checks the returned value of loop_decrement*,
196 // and update the exit branch to use it.
197 void UpdateBranch(Value
*EltsRem
);
200 HardwareLoop(HardwareLoopInfo
&Info
, ScalarEvolution
&SE
,
201 const DataLayout
&DL
,
202 OptimizationRemarkEmitter
*ORE
,
203 HardwareLoopOptions
&Opts
) :
204 SE(SE
), DL(DL
), ORE(ORE
), Opts(Opts
), L(Info
.L
), M(L
->getHeader()->getModule()),
205 ExitCount(Info
.ExitCount
),
206 CountType(Info
.CountType
),
207 ExitBranch(Info
.ExitBranch
),
208 LoopDecrement(Info
.LoopDecrement
),
209 UsePHICounter(Info
.CounterInReg
),
210 UseLoopGuard(Info
.PerformEntryTest
) { }
216 const DataLayout
&DL
;
217 OptimizationRemarkEmitter
*ORE
= nullptr;
218 HardwareLoopOptions
&Opts
;
221 const SCEV
*ExitCount
= nullptr;
222 Type
*CountType
= nullptr;
223 BranchInst
*ExitBranch
= nullptr;
224 Value
*LoopDecrement
= nullptr;
225 bool UsePHICounter
= false;
226 bool UseLoopGuard
= false;
227 BasicBlock
*BeginBB
= nullptr;
231 char HardwareLoopsLegacy::ID
= 0;
233 bool HardwareLoopsLegacy::runOnFunction(Function
&F
) {
237 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F
.getName() << "\n");
239 auto &LI
= getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
240 auto &SE
= getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
241 auto &DT
= getAnalysis
<DominatorTreeWrapperPass
>().getDomTree();
242 auto &TTI
= getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
);
243 auto &DL
= F
.getDataLayout();
244 auto *ORE
= &getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE();
245 auto *TLIP
= getAnalysisIfAvailable
<TargetLibraryInfoWrapperPass
>();
246 auto *TLI
= TLIP
? &TLIP
->getTLI(F
) : nullptr;
247 auto &AC
= getAnalysis
<AssumptionCacheTracker
>().getAssumptionCache(F
);
248 bool PreserveLCSSA
= mustPreserveAnalysisID(LCSSAID
);
250 HardwareLoopOptions Opts
;
251 if (ForceHardwareLoops
.getNumOccurrences())
252 Opts
.setForce(ForceHardwareLoops
);
253 if (ForceHardwareLoopPHI
.getNumOccurrences())
254 Opts
.setForcePhi(ForceHardwareLoopPHI
);
255 if (ForceNestedLoop
.getNumOccurrences())
256 Opts
.setForceNested(ForceNestedLoop
);
257 if (ForceGuardLoopEntry
.getNumOccurrences())
258 Opts
.setForceGuard(ForceGuardLoopEntry
);
259 if (LoopDecrement
.getNumOccurrences())
260 Opts
.setDecrement(LoopDecrement
);
261 if (CounterBitWidth
.getNumOccurrences())
262 Opts
.setCounterBitwidth(CounterBitWidth
);
264 HardwareLoopsImpl
Impl(SE
, LI
, PreserveLCSSA
, DT
, DL
, TTI
, TLI
, AC
, ORE
,
269 PreservedAnalyses
HardwareLoopsPass::run(Function
&F
,
270 FunctionAnalysisManager
&AM
) {
271 auto &LI
= AM
.getResult
<LoopAnalysis
>(F
);
272 auto &SE
= AM
.getResult
<ScalarEvolutionAnalysis
>(F
);
273 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
274 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
275 auto *TLI
= &AM
.getResult
<TargetLibraryAnalysis
>(F
);
276 auto &AC
= AM
.getResult
<AssumptionAnalysis
>(F
);
277 auto *ORE
= &AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
278 auto &DL
= F
.getDataLayout();
280 HardwareLoopsImpl
Impl(SE
, LI
, true, DT
, DL
, TTI
, TLI
, AC
, ORE
, Opts
);
281 bool Changed
= Impl
.run(F
);
283 return PreservedAnalyses::all();
285 PreservedAnalyses PA
;
286 PA
.preserve
<LoopAnalysis
>();
287 PA
.preserve
<ScalarEvolutionAnalysis
>();
288 PA
.preserve
<DominatorTreeAnalysis
>();
289 PA
.preserve
<BranchProbabilityAnalysis
>();
293 bool HardwareLoopsImpl::run(Function
&F
) {
294 LLVMContext
&Ctx
= F
.getContext();
296 if (L
->isOutermost())
297 TryConvertLoop(L
, Ctx
);
301 // Return true if the search should stop, which will be when an inner loop is
302 // converted and the parent loop doesn't support containing a hardware loop.
303 bool HardwareLoopsImpl::TryConvertLoop(Loop
*L
, LLVMContext
&Ctx
) {
304 // Process nested loops first.
305 bool AnyChanged
= false;
307 AnyChanged
|= TryConvertLoop(SL
, Ctx
);
309 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
311 return true; // Stop search.
314 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L
->getHeader()->getName() << "\n");
316 HardwareLoopInfo
HWLoopInfo(L
);
317 if (!HWLoopInfo
.canAnalyze(LI
)) {
318 reportHWLoopFailure("cannot analyze loop, irreducible control flow",
319 "HWLoopCannotAnalyze", ORE
, L
);
324 !TTI
.isHardwareLoopProfitable(L
, SE
, AC
, TLI
, HWLoopInfo
)) {
325 reportHWLoopFailure("it's not profitable to create a hardware-loop",
326 "HWLoopNotProfitable", ORE
, L
);
330 // Allow overriding of the counter width and loop decrement value.
331 if (Opts
.Bitwidth
.has_value()) {
332 HWLoopInfo
.CountType
= IntegerType::get(Ctx
, Opts
.Bitwidth
.value());
335 if (Opts
.Decrement
.has_value())
336 HWLoopInfo
.LoopDecrement
=
337 ConstantInt::get(HWLoopInfo
.CountType
, Opts
.Decrement
.value());
339 MadeChange
|= TryConvertLoop(HWLoopInfo
);
340 return MadeChange
&& (!HWLoopInfo
.IsNestingLegal
&& !Opts
.ForceNested
);
343 bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo
&HWLoopInfo
) {
345 Loop
*L
= HWLoopInfo
.L
;
346 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L
);
348 if (!HWLoopInfo
.isHardwareLoopCandidate(SE
, LI
, DT
, Opts
.getForceNested(),
349 Opts
.getForcePhi())) {
350 // TODO: there can be many reasons a loop is not considered a
351 // candidate, so we should let isHardwareLoopCandidate fill in the
352 // reason and then report a better message here.
353 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE
, L
);
358 (HWLoopInfo
.ExitBlock
&& HWLoopInfo
.ExitBranch
&& HWLoopInfo
.ExitCount
) &&
359 "Hardware Loop must have set exit info.");
361 BasicBlock
*Preheader
= L
->getLoopPreheader();
363 // If we don't have a preheader, then insert one.
365 Preheader
= InsertPreheaderForLoop(L
, &DT
, &LI
, nullptr, PreserveLCSSA
);
369 HardwareLoop
HWLoop(HWLoopInfo
, SE
, DL
, ORE
, Opts
);
375 void HardwareLoop::Create() {
376 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
378 Value
*LoopCountInit
= InitLoopCount();
379 if (!LoopCountInit
) {
380 reportHWLoopFailure("could not safely create a loop count expression",
381 "HWLoopNotSafe", ORE
, L
);
385 Value
*Setup
= InsertIterationSetup(LoopCountInit
);
387 if (UsePHICounter
|| Opts
.ForcePhi
) {
388 Instruction
*LoopDec
= InsertLoopRegDec(LoopCountInit
);
389 Value
*EltsRem
= InsertPHICounter(Setup
, LoopDec
);
390 LoopDec
->setOperand(0, EltsRem
);
391 UpdateBranch(LoopDec
);
395 // Run through the basic blocks of the loop and see if any of them have dead
396 // PHIs that can be removed.
397 for (auto *I
: L
->blocks())
401 static bool CanGenerateTest(Loop
*L
, Value
*Count
) {
402 BasicBlock
*Preheader
= L
->getLoopPreheader();
403 if (!Preheader
->getSinglePredecessor())
406 BasicBlock
*Pred
= Preheader
->getSinglePredecessor();
407 if (!isa
<BranchInst
>(Pred
->getTerminator()))
410 auto *BI
= cast
<BranchInst
>(Pred
->getTerminator());
411 if (BI
->isUnconditional() || !isa
<ICmpInst
>(BI
->getCondition()))
414 // Check that the icmp is checking for equality of Count and zero and that
415 // a non-zero value results in entering the loop.
416 auto ICmp
= cast
<ICmpInst
>(BI
->getCondition());
417 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp
<< "\n");
418 if (!ICmp
->isEquality())
421 auto IsCompareZero
= [](ICmpInst
*ICmp
, Value
*Count
, unsigned OpIdx
) {
422 if (auto *Const
= dyn_cast
<ConstantInt
>(ICmp
->getOperand(OpIdx
)))
423 return Const
->isZero() && ICmp
->getOperand(OpIdx
^ 1) == Count
;
427 // Check if Count is a zext.
428 Value
*CountBefZext
=
429 isa
<ZExtInst
>(Count
) ? cast
<ZExtInst
>(Count
)->getOperand(0) : nullptr;
431 if (!IsCompareZero(ICmp
, Count
, 0) && !IsCompareZero(ICmp
, Count
, 1) &&
432 !IsCompareZero(ICmp
, CountBefZext
, 0) &&
433 !IsCompareZero(ICmp
, CountBefZext
, 1))
436 unsigned SuccIdx
= ICmp
->getPredicate() == ICmpInst::ICMP_NE
? 0 : 1;
437 if (BI
->getSuccessor(SuccIdx
) != Preheader
)
443 Value
*HardwareLoop::InitLoopCount() {
444 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
445 // Can we replace a conditional branch with an intrinsic that sets the
446 // loop counter and tests that is not zero?
448 SCEVExpander
SCEVE(SE
, DL
, "loopcnt");
449 if (!ExitCount
->getType()->isPointerTy() &&
450 ExitCount
->getType() != CountType
)
451 ExitCount
= SE
.getZeroExtendExpr(ExitCount
, CountType
);
453 ExitCount
= SE
.getAddExpr(ExitCount
, SE
.getOne(CountType
));
455 // If we're trying to use the 'test and set' form of the intrinsic, we need
456 // to replace a conditional branch that is controlling entry to the loop. It
457 // is likely (guaranteed?) that the preheader has an unconditional branch to
458 // the loop header, so also check if it has a single predecessor.
459 if (SE
.isLoopEntryGuardedByCond(L
, ICmpInst::ICMP_NE
, ExitCount
,
460 SE
.getZero(ExitCount
->getType()))) {
461 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
465 UseLoopGuard
= false;
467 BasicBlock
*BB
= L
->getLoopPreheader();
468 if (UseLoopGuard
&& BB
->getSinglePredecessor() &&
469 cast
<BranchInst
>(BB
->getTerminator())->isUnconditional()) {
470 BasicBlock
*Predecessor
= BB
->getSinglePredecessor();
471 // If it's not safe to create a while loop then don't force it and create a
472 // do-while loop instead
473 if (!SCEVE
.isSafeToExpandAt(ExitCount
, Predecessor
->getTerminator()))
474 UseLoopGuard
= false;
479 if (!SCEVE
.isSafeToExpandAt(ExitCount
, BB
->getTerminator())) {
480 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
481 << *ExitCount
<< "\n");
485 Value
*Count
= SCEVE
.expandCodeFor(ExitCount
, CountType
,
486 BB
->getTerminator());
488 // FIXME: We've expanded Count where we hope to insert the counter setting
489 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
490 // the just 'set' form and in which case the insertion block is most likely
491 // different. It means there will be instruction(s) in a block that possibly
492 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
493 // but it's doesn't appear to work in all cases.
495 UseLoopGuard
= UseLoopGuard
&& CanGenerateTest(L
, Count
);
496 BeginBB
= UseLoopGuard
? BB
: L
->getLoopPreheader();
497 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count
<< "\n"
498 << " - Expanded Count in " << BB
->getName() << "\n"
499 << " - Will insert set counter intrinsic into: "
500 << BeginBB
->getName() << "\n");
504 Value
* HardwareLoop::InsertIterationSetup(Value
*LoopCountInit
) {
505 IRBuilder
<> Builder(BeginBB
->getTerminator());
506 if (BeginBB
->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP
))
507 Builder
.setIsFPConstrained(true);
508 Type
*Ty
= LoopCountInit
->getType();
509 bool UsePhi
= UsePHICounter
|| Opts
.ForcePhi
;
510 Intrinsic::ID ID
= UseLoopGuard
511 ? (UsePhi
? Intrinsic::test_start_loop_iterations
512 : Intrinsic::test_set_loop_iterations
)
513 : (UsePhi
? Intrinsic::start_loop_iterations
514 : Intrinsic::set_loop_iterations
);
515 Function
*LoopIter
= Intrinsic::getDeclaration(M
, ID
, Ty
);
516 Value
*LoopSetup
= Builder
.CreateCall(LoopIter
, LoopCountInit
);
518 // Use the return value of the intrinsic to control the entry of the loop.
520 assert((isa
<BranchInst
>(BeginBB
->getTerminator()) &&
521 cast
<BranchInst
>(BeginBB
->getTerminator())->isConditional()) &&
522 "Expected conditional branch");
525 UsePhi
? Builder
.CreateExtractValue(LoopSetup
, 1) : LoopSetup
;
526 auto *LoopGuard
= cast
<BranchInst
>(BeginBB
->getTerminator());
527 LoopGuard
->setCondition(SetCount
);
528 if (LoopGuard
->getSuccessor(0) != L
->getLoopPreheader())
529 LoopGuard
->swapSuccessors();
531 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
533 if (UsePhi
&& UseLoopGuard
)
534 LoopSetup
= Builder
.CreateExtractValue(LoopSetup
, 0);
535 return !UsePhi
? LoopCountInit
: LoopSetup
;
538 void HardwareLoop::InsertLoopDec() {
539 IRBuilder
<> CondBuilder(ExitBranch
);
540 if (ExitBranch
->getParent()->getParent()->getAttributes().hasFnAttr(
541 Attribute::StrictFP
))
542 CondBuilder
.setIsFPConstrained(true);
545 Intrinsic::getDeclaration(M
, Intrinsic::loop_decrement
,
546 LoopDecrement
->getType());
547 Value
*Ops
[] = { LoopDecrement
};
548 Value
*NewCond
= CondBuilder
.CreateCall(DecFunc
, Ops
);
549 Value
*OldCond
= ExitBranch
->getCondition();
550 ExitBranch
->setCondition(NewCond
);
552 // The false branch must exit the loop.
553 if (!L
->contains(ExitBranch
->getSuccessor(0)))
554 ExitBranch
->swapSuccessors();
556 // The old condition may be dead now, and may have even created a dead PHI
557 // (the original induction variable).
558 RecursivelyDeleteTriviallyDeadInstructions(OldCond
);
560 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond
<< "\n");
563 Instruction
* HardwareLoop::InsertLoopRegDec(Value
*EltsRem
) {
564 IRBuilder
<> CondBuilder(ExitBranch
);
565 if (ExitBranch
->getParent()->getParent()->getAttributes().hasFnAttr(
566 Attribute::StrictFP
))
567 CondBuilder
.setIsFPConstrained(true);
570 Intrinsic::getDeclaration(M
, Intrinsic::loop_decrement_reg
,
571 { EltsRem
->getType() });
572 Value
*Ops
[] = { EltsRem
, LoopDecrement
};
573 Value
*Call
= CondBuilder
.CreateCall(DecFunc
, Ops
);
575 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call
<< "\n");
576 return cast
<Instruction
>(Call
);
579 PHINode
* HardwareLoop::InsertPHICounter(Value
*NumElts
, Value
*EltsRem
) {
580 BasicBlock
*Preheader
= L
->getLoopPreheader();
581 BasicBlock
*Header
= L
->getHeader();
582 BasicBlock
*Latch
= ExitBranch
->getParent();
583 IRBuilder
<> Builder(Header
, Header
->getFirstNonPHIIt());
584 PHINode
*Index
= Builder
.CreatePHI(NumElts
->getType(), 2);
585 Index
->addIncoming(NumElts
, Preheader
);
586 Index
->addIncoming(EltsRem
, Latch
);
587 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index
<< "\n");
591 void HardwareLoop::UpdateBranch(Value
*EltsRem
) {
592 IRBuilder
<> CondBuilder(ExitBranch
);
594 CondBuilder
.CreateICmpNE(EltsRem
, ConstantInt::get(EltsRem
->getType(), 0));
595 Value
*OldCond
= ExitBranch
->getCondition();
596 ExitBranch
->setCondition(NewCond
);
598 // The false branch must exit the loop.
599 if (!L
->contains(ExitBranch
->getSuccessor(0)))
600 ExitBranch
->swapSuccessors();
602 // The old condition may be dead now, and may have even created a dead PHI
603 // (the original induction variable).
604 RecursivelyDeleteTriviallyDeadInstructions(OldCond
);
607 INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy
, DEBUG_TYPE
, HW_LOOPS_NAME
, false, false)
608 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
609 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
610 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
611 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
612 INITIALIZE_PASS_END(HardwareLoopsLegacy
, DEBUG_TYPE
, HW_LOOPS_NAME
, false, false)
614 FunctionPass
*llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }