llvm/test/Transforms/InstCombine/reduction-shufflevector.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3
   4 define i32 @reduce_add(<4 x i32> %x) {
   5 ; CHECK-LABEL: @reduce_add(
   6 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
   7 ; CHECK-NEXT:    ret i32 [[RES]]
   8 ;
   9   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  10   %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
  11   ret i32 %res
  12 }
  13
  14 define i32 @reduce_or(<4 x i32> %x) {
  15 ; CHECK-LABEL: @reduce_or(
  16 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
  17 ; CHECK-NEXT:    ret i32 [[RES]]
  18 ;
  19   %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
  20   %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
  21   ret i32 %res
  22 }
  23
  24 define i32 @reduce_and(<4 x i32> %x) {
  25 ; CHECK-LABEL: @reduce_and(
  26 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
  27 ; CHECK-NEXT:    ret i32 [[RES]]
  28 ;
  29   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  30   %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
  31   ret i32 %res
  32 }
  33
  34 define i32 @reduce_xor(<4 x i32> %x) {
  35 ; CHECK-LABEL: @reduce_xor(
  36 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
  37 ; CHECK-NEXT:    ret i32 [[RES]]
  38 ;
  39   %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
  40   %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
  41   ret i32 %res
  42 }
  43
  44 define i32 @reduce_umax(<4 x i32> %x) {
  45 ; CHECK-LABEL: @reduce_umax(
  46 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
  47 ; CHECK-NEXT:    ret i32 [[RES]]
  48 ;
  49   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
  50   %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
  51   ret i32 %res
  52 }
  53
  54 define i32 @reduce_umin(<4 x i32> %x) {
  55 ; CHECK-LABEL: @reduce_umin(
  56 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
  57 ; CHECK-NEXT:    ret i32 [[RES]]
  58 ;
  59   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
  60   %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
  61   ret i32 %res
  62 }
  63
  64 define i32 @reduce_smax(<4 x i32> %x) {
  65 ; CHECK-LABEL: @reduce_smax(
  66 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
  67 ; CHECK-NEXT:    ret i32 [[RES]]
  68 ;
  69   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
  70   %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
  71   ret i32 %res
  72 }
  73
  74 define i32 @reduce_smin(<4 x i32> %x) {
  75 ; CHECK-LABEL: @reduce_smin(
  76 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
  77 ; CHECK-NEXT:    ret i32 [[RES]]
  78 ;
  79   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
  80   %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
  81   ret i32 %res
  82 }
  83
  84 define float @reduce_fmax(<4 x float> %x) {
  85 ; CHECK-LABEL: @reduce_fmax(
  86 ; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
  87 ; CHECK-NEXT:    ret float [[RES]]
  88 ;
  89   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
  90   %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
  91   ret float %res
  92 }
  93
  94 define float @reduce_fmin(<4 x float> %x) {
  95 ; CHECK-LABEL: @reduce_fmin(
  96 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
  97 ; CHECK-NEXT:    ret float [[RES]]
  98 ;
  99   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 100   %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
 101   ret float %res
 102 }
 103
 104 define float @reduce_fadd(float %a, <4 x float> %x) {
 105 ; CHECK-LABEL: @reduce_fadd(
 106 ; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
 107 ; CHECK-NEXT:    ret float [[RES]]
 108 ;
 109   %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 110   %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
 111   ret float %res
 112 }
 113
 114 define float @reduce_fmul(float %a, <4 x float> %x) {
 115 ; CHECK-LABEL: @reduce_fmul(
 116 ; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
 117 ; CHECK-NEXT:    ret float [[RES]]
 118 ;
 119   %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 120   %res = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
 121   ret float %res
 122 }
 123
 124 ; Failed cases
 125 ; TODO: simplify the reductions for shuffles resulting in undef/poison elements.
 126
 127 define i32 @reduce_add_failed(<4 x i32> %x) {
 128 ; CHECK-LABEL: @reduce_add_failed(
 129 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
 130 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUF]])
 131 ; CHECK-NEXT:    ret i32 [[RES]]
 132 ;
 133   %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
 134   %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
 135   ret i32 %res
 136 }
 137
 138 define i32 @reduce_or_failed(<4 x i32> %x) {
 139 ; CHECK-LABEL: @reduce_or_failed(
 140 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
 141 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
 142 ; CHECK-NEXT:    ret i32 [[RES]]
 143 ;
 144   %shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
 145   %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
 146   ret i32 %res
 147 }
 148
 149 define i32 @reduce_and_failed(<4 x i32> %x) {
 150 ; CHECK-LABEL: @reduce_and_failed(
 151 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
 152 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
 153 ; CHECK-NEXT:    ret i32 [[RES]]
 154 ;
 155   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
 156   %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
 157   ret i32 %res
 158 }
 159
 160 define i32 @reduce_xor_failed(<4 x i32> %x) {
 161 ; CHECK-LABEL: @reduce_xor_failed(
 162 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 163 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
 164 ; CHECK-NEXT:    ret i32 [[RES]]
 165 ;
 166   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
 167   %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
 168   ret i32 %res
 169 }
 170
 171 define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) {
 172 ; CHECK-LABEL: @reduce_umax_failed(
 173 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <4 x i32> <i32 2, i32 1, i32 3, i32 0>
 174 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
 175 ; CHECK-NEXT:    ret i32 [[RES]]
 176 ;
 177   %shuf = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
 178   %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
 179   ret i32 %res
 180 }
 181
 182 define i32 @reduce_umin_failed(<2 x i32> %x) {
 183 ; CHECK-LABEL: @reduce_umin_failed(
 184 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 185 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
 186 ; CHECK-NEXT:    ret i32 [[RES]]
 187 ;
 188   %shuf = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 189   %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
 190   ret i32 %res
 191 }
 192
 193 define i32 @reduce_smax_failed(<8 x i32> %x) {
 194 ; CHECK-LABEL: @reduce_smax_failed(
 195 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
 196 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
 197 ; CHECK-NEXT:    ret i32 [[RES]]
 198 ;
 199   %shuf = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
 200   %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
 201   ret i32 %res
 202 }
 203
 204 define i32 @reduce_smin_failed(<8 x i32> %x) {
 205 ; CHECK-LABEL: @reduce_smin_failed(
 206 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 207 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
 208 ; CHECK-NEXT:    ret i32 [[RES]]
 209 ;
 210   %shuf = shufflevector <8 x i32> %x, <8 x i32> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 211   %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
 212   ret i32 %res
 213 }
 214
 215 define float @reduce_fmax_failed(<4 x float> %x) {
 216 ; CHECK-LABEL: @reduce_fmax_failed(
 217 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
 218 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
 219 ; CHECK-NEXT:    ret float [[RES]]
 220 ;
 221   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
 222   %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
 223   ret float %res
 224 }
 225
 226 define float @reduce_fmin_failed(<4 x float> %x) {
 227 ; CHECK-LABEL: @reduce_fmin_failed(
 228 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 3, i32 1, i32 2>
 229 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
 230 ; CHECK-NEXT:    ret float [[RES]]
 231 ;
 232   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2>
 233   %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
 234   ret float %res
 235 }
 236
 237 define float @reduce_fadd_failed(float %a, <4 x float> %x) {
 238 ; CHECK-LABEL: @reduce_fadd_failed(
 239 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 240 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
 241 ; CHECK-NEXT:    ret float [[RES]]
 242 ;
 243   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 244   %res = call float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
 245   ret float %res
 246 }
 247
 248 define float @reduce_fmul_failed(float %a, <2 x float> %x) {
 249 ; CHECK-LABEL: @reduce_fmul_failed(
 250 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
 251 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
 252 ; CHECK-NEXT:    ret float [[RES]]
 253 ;
 254   %shuf = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
 255   %res = call float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
 256   ret float %res
 257 }
 258
 259 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
 260 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
 261 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
 262 declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
 263 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
 264 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
 265 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
 266 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
 267 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
 268 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
 269 declare float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %b)
 270 declare float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %b)