llvm/test/CodeGen/Generic/expand-experimental-reductions.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -expand-reductions -S | FileCheck %s
   3 ; Tests without a target which should expand all reductions
   4 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
   5 declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
   6 declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
   7 declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
   8 declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
   9
  10 declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
  11 declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
  12
  13 declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
  14 declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
  15 declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
  16 declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
  17
  18 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
  19 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
  20
  21 declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)
  22
  23 define i64 @add_i64(<2 x i64> %vec) {
  24 ; CHECK-LABEL: @add_i64(
  25 ; CHECK-NEXT:  entry:
  26 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
  27 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
  28 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
  29 ; CHECK-NEXT:    ret i64 [[TMP0]]
  30 ;
  31 entry:
  32   %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec)
  33   ret i64 %r
  34 }
  35
  36 define i64 @mul_i64(<2 x i64> %vec) {
  37 ; CHECK-LABEL: @mul_i64(
  38 ; CHECK-NEXT:  entry:
  39 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
  40 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
  41 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
  42 ; CHECK-NEXT:    ret i64 [[TMP0]]
  43 ;
  44 entry:
  45   %r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec)
  46   ret i64 %r
  47 }
  48
  49 define i64 @and_i64(<2 x i64> %vec) {
  50 ; CHECK-LABEL: @and_i64(
  51 ; CHECK-NEXT:  entry:
  52 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
  53 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
  54 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
  55 ; CHECK-NEXT:    ret i64 [[TMP0]]
  56 ;
  57 entry:
  58   %r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec)
  59   ret i64 %r
  60 }
  61
  62 define i64 @or_i64(<2 x i64> %vec) {
  63 ; CHECK-LABEL: @or_i64(
  64 ; CHECK-NEXT:  entry:
  65 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
  66 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
  67 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
  68 ; CHECK-NEXT:    ret i64 [[TMP0]]
  69 ;
  70 entry:
  71   %r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec)
  72   ret i64 %r
  73 }
  74
  75 define i64 @xor_i64(<2 x i64> %vec) {
  76 ; CHECK-LABEL: @xor_i64(
  77 ; CHECK-NEXT:  entry:
  78 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
  79 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
  80 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
  81 ; CHECK-NEXT:    ret i64 [[TMP0]]
  82 ;
  83 entry:
  84   %r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec)
  85   ret i64 %r
  86 }
  87
  88 define float @fadd_f32(<4 x float> %vec) {
  89 ; CHECK-LABEL: @fadd_f32(
  90 ; CHECK-NEXT:  entry:
  91 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  92 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
  93 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  94 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
  95 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
  96 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
  97 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
  98 ;
  99 entry:
 100   %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
 101   ret float %r
 102 }
 103
 104 define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
 105 ; CHECK-LABEL: @fadd_f32_accum(
 106 ; CHECK-NEXT:  entry:
 107 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 108 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
 109 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 110 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 111 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 112 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]]
 113 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 114 ;
 115 entry:
 116   %r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
 117   ret float %r
 118 }
 119
 120 define float @fadd_f32_strict(<4 x float> %vec) {
 121 ; CHECK-LABEL: @fadd_f32_strict(
 122 ; CHECK-NEXT:  entry:
 123 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
 124 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]]
 125 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
 126 ; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
 127 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
 128 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
 129 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
 130 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
 131 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 132 ;
 133 entry:
 134   %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
 135   ret float %r
 136 }
 137
 138 define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
 139 ; CHECK-LABEL: @fadd_f32_strict_accum(
 140 ; CHECK-NEXT:  entry:
 141 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
 142 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]]
 143 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
 144 ; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]]
 145 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
 146 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]]
 147 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
 148 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]]
 149 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 150 ;
 151 entry:
 152   %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
 153   ret float %r
 154 }
 155
 156 define float @fmul_f32(<4 x float> %vec) {
 157 ; CHECK-LABEL: @fmul_f32(
 158 ; CHECK-NEXT:  entry:
 159 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 160 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
 161 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 162 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 163 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 164 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
 165 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 166 ;
 167 entry:
 168   %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
 169   ret float %r
 170 }
 171
 172 define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
 173 ; CHECK-LABEL: @fmul_f32_accum(
 174 ; CHECK-NEXT:  entry:
 175 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 176 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
 177 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 178 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 179 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 180 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]]
 181 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 182 ;
 183 entry:
 184   %r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
 185   ret float %r
 186 }
 187
 188 define float @fmul_f32_strict(<4 x float> %vec) {
 189 ; CHECK-LABEL: @fmul_f32_strict(
 190 ; CHECK-NEXT:  entry:
 191 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
 192 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]]
 193 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
 194 ; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
 195 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
 196 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
 197 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
 198 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
 199 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 200 ;
 201 entry:
 202   %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
 203   ret float %r
 204 }
 205
 206 define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
 207 ; CHECK-LABEL: @fmul_f32_strict_accum(
 208 ; CHECK-NEXT:  entry:
 209 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0
 210 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]]
 211 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1
 212 ; CHECK-NEXT:    [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]]
 213 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2
 214 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]]
 215 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3
 216 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]]
 217 ; CHECK-NEXT:    ret float [[BIN_RDX3]]
 218 ;
 219 entry:
 220   %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
 221   ret float %r
 222 }
 223
 224 define i64 @smax_i64(<2 x i64> %vec) {
 225 ; CHECK-LABEL: @smax_i64(
 226 ; CHECK-NEXT:  entry:
 227 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 228 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
 229 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 230 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
 231 ; CHECK-NEXT:    ret i64 [[TMP0]]
 232 ;
 233 entry:
 234   %r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
 235   ret i64 %r
 236 }
 237
 238 define i64 @smin_i64(<2 x i64> %vec) {
 239 ; CHECK-LABEL: @smin_i64(
 240 ; CHECK-NEXT:  entry:
 241 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 242 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
 243 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 244 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
 245 ; CHECK-NEXT:    ret i64 [[TMP0]]
 246 ;
 247 entry:
 248   %r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
 249   ret i64 %r
 250 }
 251
 252 define i64 @umax_i64(<2 x i64> %vec) {
 253 ; CHECK-LABEL: @umax_i64(
 254 ; CHECK-NEXT:  entry:
 255 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 256 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
 257 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 258 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
 259 ; CHECK-NEXT:    ret i64 [[TMP0]]
 260 ;
 261 entry:
 262   %r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
 263   ret i64 %r
 264 }
 265
 266 define i64 @umin_i64(<2 x i64> %vec) {
 267 ; CHECK-LABEL: @umin_i64(
 268 ; CHECK-NEXT:  entry:
 269 ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 270 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
 271 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 272 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
 273 ; CHECK-NEXT:    ret i64 [[TMP0]]
 274 ;
 275 entry:
 276   %r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
 277   ret i64 %r
 278 }
 279
 280 ; FIXME: Expand using maxnum intrinsic?
 281
 282 define double @fmax_f64(<2 x double> %vec) {
 283 ; CHECK-LABEL: @fmax_f64(
 284 ; CHECK-NEXT:  entry:
 285 ; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
 286 ; CHECK-NEXT:    ret double [[R]]
 287 ;
 288 entry:
 289   %r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec)
 290   ret double %r
 291 }
 292
 293 ; FIXME: Expand using minnum intrinsic?
 294
 295 define double @fmin_f64(<2 x double> %vec) {
 296 ; CHECK-LABEL: @fmin_f64(
 297 ; CHECK-NEXT:  entry:
 298 ; CHECK-NEXT:    [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
 299 ; CHECK-NEXT:    ret double [[R]]
 300 ;
 301 entry:
 302   %r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec)
 303   ret double %r
 304 }
 305
 306 ; FIXME: Why is this not expanded?
 307
 308 ; Test when the vector size is not power of two.
 309 define i8 @test_v3i8(<3 x i8> %a) nounwind {
 310 ; CHECK-LABEL: @test_v3i8(
 311 ; CHECK-NEXT:  entry:
 312 ; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
 313 ; CHECK-NEXT:    ret i8 [[B]]
 314 ;
 315 entry:
 316   %b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a)
 317   ret i8 %b
 318 }