llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
   3 ; PR9939
   4
   5 ; LSR should properly handle the post-inc offset when folding the
   6 ; non-IV operand of an icmp into the IV.
   7
   8 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
   9
  10 %struct.Vector2 = type { i16*, [64 x i16], i32 }
  11
  12 @.str = private unnamed_addr constant [37 x i8] c"0123456789abcdefghijklmnopqrstuvwxyz\00"
  13
  14 define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2* nocapture %result) nounwind noinline {
  15 ; CHECK-LABEL: @_Z15IntegerToStringjjR7Vector2(
  16 ; CHECK-NEXT:  entry:
  17 ; CHECK-NEXT:    [[BUFFER:%.*]] = alloca [33 x i16], align 16
  18 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* [[BUFFER]], i64 0, i64 33
  19 ; CHECK-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i16* [[ADD_PTR]] to i64
  20 ; CHECK-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i16* [[ADD_PTR]] to i64
  21 ; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr [33 x i16], [33 x i16]* [[BUFFER]], i64 0, i64 32
  22 ; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to [33 x i16]*
  23 ; CHECK-NEXT:    [[SCEVGEP11:%.*]] = getelementptr [33 x i16], [33 x i16]* [[BUFFER]], i64 1, i64 0
  24 ; CHECK-NEXT:    [[SCEVGEP1112:%.*]] = bitcast i16* [[SCEVGEP11]] to [33 x i16]*
  25 ; CHECK-NEXT:    br label [[DO_BODY:%.*]]
  26 ; CHECK:       do.body:
  27 ; CHECK-NEXT:    [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], [[DO_BODY]] ], [ -1, [[ENTRY:%.*]] ]
  28 ; CHECK-NEXT:    [[LSR_IV13:%.*]] = phi [33 x i16]* [ [[TMP2:%.*]], [[DO_BODY]] ], [ [[SCEVGEP1112]], [[ENTRY]] ]
  29 ; CHECK-NEXT:    [[LSR_IV6:%.*]] = phi [33 x i16]* [ [[TMP1:%.*]], [[DO_BODY]] ], [ [[SCEVGEP45]], [[ENTRY]] ]
  30 ; CHECK-NEXT:    [[I_ADDR_0:%.*]] = phi i32 [ [[DIV:%.*]], [[DO_BODY]] ], [ [[I:%.*]], [[ENTRY]] ]
  31 ; CHECK-NEXT:    [[LSR_IV617:%.*]] = bitcast [33 x i16]* [[LSR_IV6]] to i16*
  32 ; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[I_ADDR_0]], 10
  33 ; CHECK-NEXT:    [[DIV]] = udiv i32 [[I_ADDR_0]], 10
  34 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[REM]] to i64
  35 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [37 x i8], [37 x i8]* @.str, i64 0, i64 [[IDXPROM]]
  36 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
  37 ; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP5]] to i16
  38 ; CHECK-NEXT:    store i16 [[CONV]], i16* [[LSR_IV617]], align 2
  39 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[I_ADDR_0]], 9
  40 ; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr [33 x i16], [33 x i16]* [[LSR_IV6]], i64 0, i64 -1
  41 ; CHECK-NEXT:    [[TMP1]] = bitcast i16* [[SCEVGEP7]] to [33 x i16]*
  42 ; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr [33 x i16], [33 x i16]* [[LSR_IV13]], i64 0, i64 -1
  43 ; CHECK-NEXT:    [[TMP2]] = bitcast i16* [[SCEVGEP14]] to [33 x i16]*
  44 ; CHECK-NEXT:    [[LSR_IV_NEXT16]] = add i64 [[LSR_IV15]], 1
  45 ; CHECK-NEXT:    br i1 [[TMP0]], label [[DO_BODY]], label [[DO_END:%.*]]
  46 ; CHECK:       do.end:
  47 ; CHECK-NEXT:    [[XAP_0:%.*]] = inttoptr i64 [[LSR_IV_NEXT16]] to i1*
  48 ; CHECK-NEXT:    [[CAP_0:%.*]] = ptrtoint i1* [[XAP_0]] to i64
  49 ; CHECK-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
  50 ; CHECK-NEXT:    [[SUB_PTR_DIV39:%.*]] = lshr exact i64 [[SUB_PTR_SUB]], 1
  51 ; CHECK-NEXT:    [[CONV11:%.*]] = trunc i64 [[SUB_PTR_DIV39]] to i32
  52 ; CHECK-NEXT:    [[MLENGTH:%.*]] = getelementptr inbounds [[STRUCT_VECTOR2:%.*]], %struct.Vector2* [[RESULT:%.*]], i64 0, i32 2
  53 ; CHECK-NEXT:    [[IDX_EXT21:%.*]] = bitcast i64 [[SUB_PTR_DIV39]] to i64
  54 ; CHECK-NEXT:    [[CMP2740:%.*]] = icmp eq i64 [[IDX_EXT21]], 0
  55 ; CHECK-NEXT:    br i1 [[CMP2740]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]]
  56 ; CHECK:       for.body.lr.ph:
  57 ; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[MLENGTH]], align 4
  58 ; CHECK-NEXT:    [[MBEGIN:%.*]] = getelementptr inbounds [[STRUCT_VECTOR2]], %struct.Vector2* [[RESULT]], i64 0, i32 0
  59 ; CHECK-NEXT:    [[TMP14:%.*]] = load i16*, i16** [[MBEGIN]], align 8
  60 ; CHECK-NEXT:    [[TMP48:%.*]] = zext i32 [[TMP16]] to i64
  61 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[TMP14]], i64 [[TMP48]]
  62 ; CHECK-NEXT:    [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
  63 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
  64 ; CHECK:       for.body:
  65 ; CHECK-NEXT:    [[LSR_IV8:%.*]] = phi [33 x i16]* [ [[TMP3:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[FOR_BODY_LR_PH]] ]
  66 ; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ]
  67 ; CHECK-NEXT:    [[LSR_IV810:%.*]] = bitcast [33 x i16]* [[LSR_IV8]] to i16*
  68 ; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP1]], i64 [[LSR_IV]]
  69 ; CHECK-NEXT:    [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
  70 ; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
  71 ; CHECK-NEXT:    store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
  72 ; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
  73 ; CHECK-NEXT:    [[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
  74 ; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* [[LSR_IV8]], i64 0, i64 1
  75 ; CHECK-NEXT:    [[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*
  76 ; CHECK-NEXT:    [[CMP27:%.*]] = icmp eq i16* [[LSR_IV_NEXT3]], null
  77 ; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
  78 ; CHECK:       for.end.loopexit:
  79 ; CHECK-NEXT:    br label [[FOR_END]]
  80 ; CHECK:       for.end:
  81 ; CHECK-NEXT:    [[TMP38:%.*]] = load i32, i32* [[MLENGTH]], align 4
  82 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP38]], [[CONV11]]
  83 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[MLENGTH]], align 4
  84 ; CHECK-NEXT:    ret void
  85 ;
  86 entry:
  87   %buffer = alloca [33 x i16], align 16
  88   %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
  89   %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
  90   %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
  91   br label %do.body
  92
  93 do.body:                                          ; preds = %do.body, %entry
  94   %0 = phi i64 [ %indvar.next44, %do.body ], [ 0, %entry ]
  95   %i.addr.0 = phi i32 [ %div, %do.body ], [ %i, %entry ]
  96   %tmp51 = sub i64 32, %0
  97   %incdec.ptr = getelementptr [33 x i16], [33 x i16]* %buffer, i64 0, i64 %tmp51
  98   %rem = urem i32 %i.addr.0, 10
  99   %div = udiv i32 %i.addr.0, 10
 100   %idxprom = zext i32 %rem to i64
 101   %arrayidx = getelementptr inbounds [37 x i8], [37 x i8]* @.str, i64 0, i64 %idxprom
 102   %tmp5 = load i8, i8* %arrayidx, align 1
 103   %conv = sext i8 %tmp5 to i16
 104   store i16 %conv, i16* %incdec.ptr, align 2
 105   %1 = icmp ugt i32 %i.addr.0, 9
 106   %indvar.next44 = add i64 %0, 1
 107   br i1 %1, label %do.body, label %do.end
 108
 109 do.end:                                           ; preds = %do.body
 110   %xap.0 = inttoptr i64 %0 to i1*
 111   %cap.0 = ptrtoint i1* %xap.0 to i64
 112   %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
 113   %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
 114   %conv11 = trunc i64 %sub.ptr.div39 to i32
 115   %mLength = getelementptr inbounds %struct.Vector2, %struct.Vector2* %result, i64 0, i32 2
 116   %idx.ext21 = bitcast i64 %sub.ptr.div39 to i64
 117   %incdec.ptr.sum = add i64 %idx.ext21, -1
 118   %cp.0.sum = sub i64 %incdec.ptr.sum, %0
 119   %add.ptr22 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 1, i64 %cp.0.sum
 120   %cmp2740 = icmp eq i64 %idx.ext21, 0
 121   br i1 %cmp2740, label %for.end, label %for.body.lr.ph
 122
 123 for.body.lr.ph:                                   ; preds = %do.end
 124   %tmp16 = load i32, i32* %mLength, align 4
 125   %mBegin = getelementptr inbounds %struct.Vector2, %struct.Vector2* %result, i64 0, i32 0
 126   %tmp14 = load i16*, i16** %mBegin, align 8
 127   %tmp48 = zext i32 %tmp16 to i64
 128   br label %for.body
 129
 130 for.body:                                         ; preds = %for.body, %for.body.lr.ph
 131   %indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ]
 132   %tmp46 = add i64 %tmp51, %indvar
 133   %p.042 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 0, i64 %tmp46
 134   %tmp47 = sub i64 %indvar, %0
 135   %incdec.ptr32 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 1, i64 %tmp47
 136   %tmp49 = add i64 %tmp48, %indvar
 137   %dst.041 = getelementptr i16, i16* %tmp14, i64 %tmp49
 138   %tmp29 = load i16, i16* %p.042, align 2
 139   store i16 %tmp29, i16* %dst.041, align 2
 140   %cmp27 = icmp eq i16* %incdec.ptr32, %add.ptr22
 141   %indvar.next = add i64 %indvar, 1
 142   br i1 %cmp27, label %for.end.loopexit, label %for.body
 143
 144 for.end.loopexit:                                 ; preds = %for.body
 145   br label %for.end
 146
 147 for.end:                                          ; preds = %for.end.loopexit, %do.end
 148   %tmp38 = load i32, i32* %mLength, align 4
 149   %add = add i32 %tmp38, %conv11
 150   store i32 %add, i32* %mLength, align 4
 151   ret void
 152 }