From a9dba968259e1bf3c400d2578cda0def65800923 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 27 Apr 2009 20:16:15 +0000 Subject: [PATCH] Teach getZeroExtendExpr and getSignExtendExpr to use trip-count information to simplify [sz]ext({a,+,b}) to {zext(a),+,[zs]ext(b)}, as appropriate. These functions and the trip count code each call into the other, so this requires careful handling to avoid infinite recursion. During the initial trip count computation, conservative SCEVs are used, which are subsequently discarded once the trip count is actually known. Among other benefits, this change lets LSR automatically eliminate some unnecessary zext-inreg and sext-inreg operation where the operand is an induction variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@70241 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 142 +++++++- test/CodeGen/X86/masked-iv-safe.ll | 244 +++++++++++++ test/CodeGen/X86/masked-iv-unsafe.ll | 386 +++++++++++++++++++++ .../change-compare-stride-trickiness-1.ll | 10 +- 4 files changed, 769 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/masked-iv-safe.ll create mode 100644 test/CodeGen/X86/masked-iv-unsafe.ll diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index a9521bb581..63ad2970f4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -701,17 +701,81 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, if (SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle ZMul = + getMulExpr(CastedBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getZeroExtendExpr(ZMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getZeroExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, ZMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getZeroExtendExpr(ZMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + AR->getLoop()); + } + + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); return Result; } -SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type *Ty) { +SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -726,10 +790,54 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type * if (SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no signed overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getSignExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getSignExtendExpr(Add, WideTy) == + getAddExpr(getSignExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); @@ -1962,20 +2070,36 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { /// hasLoopInvariantBackedgeTakenCount). /// SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - std::map::iterator I = BackedgeTakenCounts.find(L); - if (I == BackedgeTakenCounts.end()) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, procede to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (Pair.second) { SCEVHandle ItCount = ComputeBackedgeTakenCount(L); - I = BackedgeTakenCounts.insert(std::make_pair(L, ItCount)).first; if (ItCount != UnknownValue) { assert(ItCount->isLoopInvariant(L) && "Computed trip count isn't loop invariant for loop!"); ++NumTripCountsComputed; + + // Now that we know the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they + // are only conservative estimates made without the benefit + // of trip count information. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) + deleteValueFromRecords(PN); + + // Update the value in the map. + Pair.first->second = ItCount; } else if (isa(L->getHeader()->begin())) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } } - return I->second; + return Pair.first->second; } /// forgetLoopBackedgeTakenCount - This method should be called by the diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll new file mode 100644 index 0000000000..2ba3f830fd --- /dev/null +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -0,0 +1,244 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep and %t +; RUN: not grep movz %t +; RUN: not grep sar %t +; RUN: not grep shl %t +; RUN: grep add %t | count 6 +; RUN: grep inc %t | count 4 +; RUN: grep dec %t | count 2 +; RUN: grep lea %t | count 2 + +; Optimize away zext-inreg and sext-inreg on the loop induction +; variable using trip-count information. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll new file mode 100644 index 0000000000..7ccfe855a6 --- /dev/null +++ b/test/CodeGen/X86/masked-iv-unsafe.ll @@ -0,0 +1,386 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep and %t | count 6 +; RUN: grep movzb %t | count 6 +; RUN: grep sar %t | count 12 + +; Don't optimize away zext-inreg and sext-inreg on the loop induction +; variable, because it isn't safe to do so in these cases. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @still_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + + + diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll index 06e231212e..cb2f3aa516 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll +++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpl \$8} +; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpq \$8} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin9" @@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin9" ; happens after the relevant use, so the comparison stride can be ; easily changed. -define void @foo() { +define void @foo() nounwind { entry: br label %loop @@ -14,9 +14,11 @@ loop: %indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ] ; [#uses=1] %i.2.0.us1534 = add i32 %indvar, 1 ; [#uses=3] %tmp628.us1540 = shl i32 %i.2.0.us1534, 1 ; [#uses=1] - %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=0] + %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=1] + store i64 %tmp645646647.us1547, i64* null %tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4 ; [#uses=2] - %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=0] + %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=1] + store i32 %tmp623.us1538, i32* null br i1 %tmp611.us1535, label %exit, label %loop exit: -- 2.11.4.GIT