llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
   3 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-F16
   4
   5 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   6
   7 define void @reduce_umin() {
   8 ; CHECK-LABEL: 'reduce_umin'
   9 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
  10 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
  11 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
  12 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
  13 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
  14 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
  15 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
  16 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
  17 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
  18 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
  19 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
  20 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
  21 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
  22 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
  23 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
  24 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
  25 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  26 ;
  27   %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
  28   %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
  29   %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
  30   %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
  31   %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
  32   %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
  33   %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
  34   %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
  35   %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
  36   %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
  37   %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
  38   %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
  39   %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
  40   %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
  41   %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
  42   %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
  43   ret void
  44 }
  45
  46 define void @reduce_umax() {
  47 ; CHECK-LABEL: 'reduce_umax'
  48 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
  49 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
  50 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
  51 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
  52 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
  53 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
  54 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
  55 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
  56 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
  57 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
  58 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
  59 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
  60 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
  61 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
  62 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
  63 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
  64 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  65 ;
  66   %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
  67   %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
  68   %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
  69   %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
  70   %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
  71   %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
  72   %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
  73   %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
  74   %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
  75   %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
  76   %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
  77   %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
  78   %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
  79   %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
  80   %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
  81   %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
  82   ret void
  83 }
  84
  85 define void @reduce_smin() {
  86 ; CHECK-LABEL: 'reduce_smin'
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
  88 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
  89 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
  90 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
  91 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
  92 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
  93 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
  94 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
  95 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
  96 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
  97 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
  98 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
  99 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
 100 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
 101 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
 102 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
 103 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 104 ;
 105   %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
 106   %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
 107   %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
 108   %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
 109   %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
 110   %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 111   %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 112   %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
 113   %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
 114   %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
 115   %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
 116   %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
 117   %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
 118   %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
 119   %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
 120   %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
 121   ret void
 122 }
 123
 124 define void @reduce_smax() {
 125 ; CHECK-LABEL: 'reduce_smax'
 126 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
 127 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
 128 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
 129 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
 130 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
 131 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 132 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 133 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
 134 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
 135 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
 136 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
 137 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
 138 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
 139 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
 140 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
 141 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
 142 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 143 ;
 144   %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
 145   %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
 146   %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
 147   %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
 148   %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
 149   %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 150   %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 151   %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
 152   %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
 153   %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
 154   %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
 155   %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
 156   %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
 157   %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
 158   %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
 159   %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
 160   ret void
 161 }
 162
 163 define void @reduce_fmin16() {
 164 ; CHECK-NOF16-LABEL: 'reduce_fmin16'
 165 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
 166 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
 167 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
 168 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
 169 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
 170 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
 171 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
 172 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
 173 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 174 ;
 175 ; CHECK-F16-LABEL: 'reduce_fmin16'
 176 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
 177 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
 178 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
 179 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
 180 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
 181 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
 182 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
 183 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
 184 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 185 ;
 186   %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
 187   %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
 188   %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
 189   %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
 190   %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
 191   %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
 192   %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
 193   %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef)
 194   ret void
 195 }
 196
 197 define void @reduce_fmax16() {
 198 ; CHECK-NOF16-LABEL: 'reduce_fmax16'
 199 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
 200 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
 201 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
 202 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
 203 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
 204 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
 205 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
 206 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
 207 ; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 208 ;
 209 ; CHECK-F16-LABEL: 'reduce_fmax16'
 210 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
 211 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
 212 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
 213 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
 214 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
 215 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
 216 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
 217 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
 218 ; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 219 ;
 220   %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
 221   %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
 222   %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
 223   %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
 224   %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
 225   %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
 226   %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
 227   %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef)
 228   ret void
 229 }
 230
 231 define void @reduce_fmin() {
 232 ; CHECK-LABEL: 'reduce_fmin'
 233 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
 234 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
 235 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
 236 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
 237 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
 238 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
 239 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
 240 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
 241 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
 242 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
 243 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 244 ;
 245   %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
 246   %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
 247   %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
 248   %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
 249   %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
 250   %V2f32m = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
 251   %V4f32m = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> undef)
 252   %V8f32m = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> undef)
 253   %V2f64m = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
 254   %V4f64m = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> undef)
 255   ret void
 256 }
 257
 258 define void @reduce_fmax() {
 259 ; CHECK-LABEL: 'reduce_fmax'
 260 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
 261 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
 262 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
 263 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
 264 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
 265 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
 266 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
 267 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
 268 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
 269 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
 270 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 271 ;
 272   %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
 273   %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
 274   %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
 275   %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
 276   %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
 277   %V2f32m = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
 278   %V4f32m = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> undef)
 279   %V8f32m = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> undef)
 280   %V2f64m = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
 281   %V4f64m = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> undef)
 282   ret void
 283 }
 284
 285 declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>)
 286 declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
 287 declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
 288 declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
 289 declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
 290 declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
 291 declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
 292 declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
 293 declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
 294 declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
 295 declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
 296 declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
 297 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
 298 declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
 299 declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
 300 declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
 301
 302 declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>)
 303 declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
 304 declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
 305 declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
 306 declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
 307 declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
 308 declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
 309 declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
 310 declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
 311 declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
 312 declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
 313 declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
 314 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
 315 declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
 316 declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
 317 declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
 318
 319 declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>)
 320 declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
 321 declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
 322 declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
 323 declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
 324 declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
 325 declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
 326 declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
 327 declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
 328 declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
 329 declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
 330 declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
 331 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
 332 declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
 333 declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
 334 declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
 335
 336 declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>)
 337 declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
 338 declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
 339 declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
 340 declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
 341 declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
 342 declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
 343 declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
 344 declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
 345 declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
 346 declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
 347 declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
 348 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
 349 declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
 350 declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
 351 declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
 352
 353 declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
 354 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
 355 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
 356 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
 357 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
 358 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
 359 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
 360 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
 361 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
 362
 363 declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
 364 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
 365 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
 366 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
 367 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
 368 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
 369 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
 370 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
 371 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
 372
 373 declare half @llvm.vector.reduce.fminimum.v2f16(<2 x half>)
 374 declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
 375 declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
 376 declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
 377 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
 378 declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
 379 declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
 380 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
 381 declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
 382
 383 declare half @llvm.vector.reduce.fmaximum.v2f16(<2 x half>)
 384 declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
 385 declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
 386 declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
 387 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
 388 declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
 389 declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
 390 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
 391 declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)