llvm/test/Transforms/LoopVectorize/trunc-reductions.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
   3
   4 define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
   5 ; CHECK-LABEL: @reduction_and_trunc(
   6 ; CHECK-NEXT:  entry:
   7 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
   8 ; CHECK:       vector.ph:
   9 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
  10 ; CHECK:       vector.body:
  11 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  12 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
  13 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
  14 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
  15 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
  16 ; CHECK-NEXT:    [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
  17 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
  18 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
  19 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
  20 ; CHECK:       middle.block:
  21 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]])
  22 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
  23 ; CHECK:       scalar.ph:
  24 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
  25 ; CHECK:       for.body:
  26 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
  27 ; CHECK:       for.end:
  28 ; CHECK-NEXT:    [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
  29 ; CHECK-NEXT:    ret i8 [[AND_LCSSA_OFF0]]
  30 ;
  31 entry:
  32   br label %for.body
  33
  34 for.body:
  35   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
  36   %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
  37   %sum.02 = and i32 %sum.02p, 255
  38   %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
  39   %load = load i8, ptr %gep
  40   %ext = zext i8 %load to i32
  41   %and = and i32 %sum.02, %ext
  42   %iv.next = add i32 %iv, 1
  43   %exitcond = icmp eq i32 %iv.next, 256
  44   br i1 %exitcond, label %for.end, label %for.body
  45
  46 for.end:
  47   %ret = trunc i32 %and to i8
  48   ret i8 %ret
  49 }
  50
  51 define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
  52 ; CHECK-LABEL: @reduction_or_trunc(
  53 ; CHECK-NEXT:  entry:
  54 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
  55 ; CHECK:       vector.ph:
  56 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
  57 ; CHECK:       vector.body:
  58 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  59 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
  60 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
  61 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
  62 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
  63 ; CHECK-NEXT:    [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
  64 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
  65 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
  66 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
  67 ; CHECK:       middle.block:
  68 ; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]])
  69 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
  70 ; CHECK:       scalar.ph:
  71 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
  72 ; CHECK:       for.body:
  73 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
  74 ; CHECK:       for.end:
  75 ; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
  76 ; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
  77 ;
  78 entry:
  79   br label %for.body
  80
  81 for.body:
  82   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
  83   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
  84   %sum.02 = and i32 %sum.02p, 65535
  85   %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
  86   %load = load i16, ptr %gep
  87   %ext = zext i16 %load to i32
  88   %xor = or i32 %sum.02, %ext
  89   %iv.next = add i32 %iv, 1
  90   %exitcond = icmp eq i32 %iv.next, 256
  91   br i1 %exitcond, label %for.end, label %for.body
  92
  93 for.end:
  94   %ret = trunc i32 %xor to i16
  95   ret i16 %ret
  96 }
  97
  98 define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
  99 ; CHECK-LABEL: @reduction_xor_trunc(
 100 ; CHECK-NEXT:  entry:
 101 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 102 ; CHECK:       vector.ph:
 103 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 104 ; CHECK:       vector.body:
 105 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 106 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 107 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 108 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
 109 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
 110 ; CHECK-NEXT:    [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 111 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 112 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 113 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 114 ; CHECK:       middle.block:
 115 ; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]])
 116 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 117 ; CHECK:       scalar.ph:
 118 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 119 ; CHECK:       for.body:
 120 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 121 ; CHECK:       for.end:
 122 ; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 123 ; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
 124 ;
 125 entry:
 126   br label %for.body
 127
 128 for.body:
 129   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
 130   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
 131   %sum.02 = and i32 %sum.02p, 65535
 132   %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
 133   %load = load i16, ptr %gep
 134   %ext = zext i16 %load to i32
 135   %xor = xor i32 %sum.02, %ext
 136   %iv.next = add i32 %iv, 1
 137   %exitcond = icmp eq i32 %iv.next, 256
 138   br i1 %exitcond, label %for.end, label %for.body
 139
 140 for.end:
 141   %ret = trunc i32 %xor to i16
 142   ret i16 %ret
 143 }
 144
 145 define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
 146 ; CHECK-LABEL: @reduction_smin_trunc(
 147 ; CHECK-NOT: vector.body
 148 ; CHECK-NOT: <8 x
 149 ; CHECK: ret
 150 entry:
 151   br label %for.body
 152
 153 for.body:
 154   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
 155   %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
 156   %sum.02 = and i32 %sum.02p, 255
 157   %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
 158   %load = load i8, ptr %gep
 159   %ext = sext i8 %load to i32
 160   %icmp = icmp slt i32 %sum.02, %ext
 161   %min = select i1 %icmp, i32 %sum.02, i32 %ext
 162   %iv.next = add i32 %iv, 1
 163   %exitcond = icmp eq i32 %iv.next, 256
 164   br i1 %exitcond, label %for.end, label %for.body
 165
 166 for.end:
 167   %ret = trunc i32 %min to i8
 168   ret i8 %ret
 169 }
 170
 171 define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
 172 ; CHECK-LABEL: @reduction_umin_trunc(
 173 ; CHECK-NOT: vector.body
 174 ; CHECK-NOT: <8 x
 175 ; CHECK: ret
 176 entry:
 177   br label %for.body
 178
 179 for.body:
 180   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
 181   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
 182   %sum.02 = and i32 %sum.02p, 255
 183   %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
 184   %load = load i8, ptr %gep
 185   %ext = zext i8 %load to i32
 186   %icmp = icmp ult i32 %sum.02, %ext
 187   %min = select i1 %icmp, i32 %sum.02, i32 %ext
 188   %iv.next = add i32 %iv, 1
 189   %exitcond = icmp eq i32 %iv.next, 256
 190   br i1 %exitcond, label %for.end, label %for.body
 191
 192 for.end:
 193   %ret = trunc i32 %min to i8
 194   ret i8 %ret
 195 }
 196
 197 define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
 198 ; CHECK-LABEL: @reduction_smax_trunc(
 199 ; CHECK-NOT: vector.body
 200 ; CHECK-NOT: <8 x
 201 ; CHECK: ret
 202 entry:
 203   br label %for.body
 204
 205 for.body:
 206   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
 207   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
 208   %sum.02 = and i32 %sum.02p, 65535
 209   %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
 210   %load = load i16, ptr %gep
 211   %ext = sext i16 %load to i32
 212   %icmp = icmp sgt i32 %sum.02, %ext
 213   %min = select i1 %icmp, i32 %sum.02, i32 %ext
 214   %iv.next = add i32 %iv, 1
 215   %exitcond = icmp eq i32 %iv.next, 256
 216   br i1 %exitcond, label %for.end, label %for.body
 217
 218 for.end:
 219   %ret = trunc i32 %min to i16
 220   ret i16 %ret
 221 }
 222
 223 define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
 224 ; CHECK-LABEL: @reduction_umax_trunc(
 225 ; CHECK-NOT: vector.body
 226 ; CHECK-NOT: <8 x
 227 ; CHECK: ret
 228 entry:
 229   br label %for.body
 230
 231 for.body:
 232   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
 233   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
 234   %sum.02 = and i32 %sum.02p, 65535
 235   %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
 236   %load = load i16, ptr %gep
 237   %ext = zext i16 %load to i32
 238   %icmp = icmp ugt i32 %sum.02, %ext
 239   %min = select i1 %icmp, i32 %sum.02, i32 %ext
 240   %iv.next = add i32 %iv, 1
 241   %exitcond = icmp eq i32 %iv.next, 256
 242   br i1 %exitcond, label %for.end, label %for.body
 243
 244 for.end:
 245   %ret = trunc i32 %min to i16
 246   ret i16 %ret
 247 }
 248
 249 ; Test case for https://github.com/llvm/llvm-project/issues/81415.
 250 define i32 @reduction_and_or(i16 %a, i32 %b, ptr %src) {
 251 ; CHECK-LABEL: @reduction_and_or(
 252 ; CHECK-NEXT:  entry:
 253 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 254 ; CHECK:       vector.ph:
 255 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 256 ; CHECK:       vector.body:
 257 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 258 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 259 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 260 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
 261 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
 262 ; CHECK-NEXT:    [[TMP2]] = or <8 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
 263 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 264 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
 265 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 266 ; CHECK:       middle.block:
 267 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
 268 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
 269 ; CHECK:       scalar.ph:
 270 ; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ]
 271 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 272 ; CHECK:       loop:
 273 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
 274 ; CHECK-NEXT:    [[OR67:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
 275 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg i32 [[IV]] to i64
 276 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
 277 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
 278 ; CHECK-NEXT:    [[OR]] = or i32 [[OR67]], [[L]]
 279 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 280 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 999
 281 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 282 ; CHECK:       exit:
 283 ; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ]
 284 ; CHECK-NEXT:    ret i32 [[OR_LCSSA]]
 285 ;
 286 entry:
 287   %ext1 = zext i16 %a to i32
 288   br label %loop
 289
 290 loop:
 291   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 292   %or67 = phi i32 [ 10, %entry ], [ %or, %loop ]
 293   %t = trunc i32 %b to i16
 294   %ext = sext i16 %t to i32
 295   %cmp = icmp sgt i32 %ext, %ext1
 296   %ext2 = zext i1 %cmp to i32
 297   %cmp3 = icmp sge i32 %iv, %ext2
 298   %ext4 = zext i1 %cmp3 to i32
 299   %div = sdiv i32 %ext4, %b
 300   %and = and i32 %div, 0
 301   %gep = getelementptr inbounds i32, ptr %src, i32 %iv
 302   %l = load i32, ptr %gep
 303   %add = add i32 %and, %l
 304   %or = or i32 %or67, %add
 305   %iv.next = add nsw i32 %iv, 1
 306   %tobool.not = icmp eq i32 %iv.next, 999
 307   br i1 %tobool.not, label %exit, label %loop
 308
 309 exit:
 310   %or.lcssa = phi i32 [ %or, %loop ]
 311   ret i32 %or.lcssa
 312 }