llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
   3 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
   4 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
   5 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
   6
   7 ; Test a cross-section of intrinsics for various cost-kinds.
   8 ; Other test files may check for accuracy of a particular intrinsic
   9 ; across subtargets or types. This is just a basic correctness check using the
  10 ; default x86 target and a legal scalar type (i32/float) and/or an
  11 ; illegal vector type (16 x i32/float).
  12
  13 declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
  14 declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
  15
  16 declare i32 @llvm.smax.i32(i32, i32)
  17 declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
  18
  19 declare float @llvm.copysign.f32(float, float)
  20 declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
  21
  22 declare float @llvm.fmuladd.f32(float, float, float)
  23 declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
  24
  25 declare float @llvm.log2.f32(float)
  26 declare <16 x float> @llvm.log2.v16f32(<16 x float>)
  27
  28 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
  29 declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
  30
  31 declare float @llvm.maximum.f32(float, float)
  32 declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
  33
  34 declare i32 @llvm.cttz.i32(i32, i1)
  35 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
  36
  37 declare i32 @llvm.ctlz.i32(i32, i1)
  38 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
  39
  40 declare i32 @llvm.fshl.i32(i32, i32, i32)
  41 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
  42
  43 declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
  44 declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
  45 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
  46 declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>)
  47 declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
  48
  49 declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
  50
  51 define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
  52 ; THRU-LABEL: 'umul'
  53 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  54 ; THRU-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  55 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  56 ;
  57 ; LATE-LABEL: 'umul'
  58 ; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  59 ; LATE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  60 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  61 ;
  62 ; SIZE-LABEL: 'umul'
  63 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  64 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  65 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  66 ;
  67 ; SIZE_LATE-LABEL: 'umul'
  68 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  69 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  70 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  71 ;
  72   %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  73   %v = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  74   ret void
  75 }
  76
  77 define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
  78 ; THRU-LABEL: 'smax'
  79 ; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  80 ; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  81 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  82 ;
  83 ; LATE-LABEL: 'smax'
  84 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  85 ; LATE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  86 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  87 ;
  88 ; SIZE-LABEL: 'smax'
  89 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  90 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  91 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  92 ;
  93 ; SIZE_LATE-LABEL: 'smax'
  94 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  95 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  96 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  97 ;
  98   %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  99   %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
 100   ret void
 101 }
 102
 103 define void @fcopysign(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
 104 ; THRU-LABEL: 'fcopysign'
 105 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
 106 ; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
 107 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 108 ;
 109 ; LATE-LABEL: 'fcopysign'
 110 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
 111 ; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
 112 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 113 ;
 114 ; SIZE-LABEL: 'fcopysign'
 115 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
 116 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
 117 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 118 ;
 119 ; SIZE_LATE-LABEL: 'fcopysign'
 120 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b)
 121 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
 122 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 123 ;
 124   %s = call float @llvm.copysign.f32(float %a, float %b)
 125   %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb)
 126   ret void
 127 }
 128
 129 define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
 130 ; THRU-LABEL: 'fmuladd'
 131 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 132 ; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 133 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 134 ;
 135 ; LATE-LABEL: 'fmuladd'
 136 ; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 137 ; LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 138 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 139 ;
 140 ; SIZE-LABEL: 'fmuladd'
 141 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 142 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 143 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 144 ;
 145 ; SIZE_LATE-LABEL: 'fmuladd'
 146 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 147 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 148 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 149 ;
 150   %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 151   %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 152   ret void
 153 }
 154
 155 define void @log2(float %a, <16 x float> %va) {
 156 ; THRU-LABEL: 'log2'
 157 ; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
 158 ; THRU-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 159 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 160 ;
 161 ; LATE-LABEL: 'log2'
 162 ; LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
 163 ; LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 164 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 165 ;
 166 ; SIZE-LABEL: 'log2'
 167 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
 168 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 169 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 170 ;
 171 ; SIZE_LATE-LABEL: 'log2'
 172 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
 173 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 174 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 175 ;
 176   %s = call float @llvm.log2.f32(float %a)
 177   %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 178   ret void
 179 }
 180
 181 define void @constrained_fadd(float %a, <16 x float> %va) strictfp {
 182 ; THRU-LABEL: 'constrained_fadd'
 183 ; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 184 ; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 185 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 186 ;
 187 ; LATE-LABEL: 'constrained_fadd'
 188 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 189 ; LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 190 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 191 ;
 192 ; SIZE-LABEL: 'constrained_fadd'
 193 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 194 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 195 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 196 ;
 197 ; SIZE_LATE-LABEL: 'constrained_fadd'
 198 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 199 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 200 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 201 ;
 202   %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 203   %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 204   ret void
 205 }
 206
 207 define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
 208 ; THRU-LABEL: 'fmaximum'
 209 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 210 ; THRU-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 211 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 212 ;
 213 ; LATE-LABEL: 'fmaximum'
 214 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 215 ; LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 216 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 217 ;
 218 ; SIZE-LABEL: 'fmaximum'
 219 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 220 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 221 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 222 ;
 223 ; SIZE_LATE-LABEL: 'fmaximum'
 224 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 225 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 226 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 227 ;
 228   %s = call float @llvm.maximum.f32(float %a, float %b)
 229   %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 230   ret void
 231 }
 232
 233 define void @cttz(i32 %a, <16 x i32> %va) {
 234 ; THRU-LABEL: 'cttz'
 235 ; THRU-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 236 ; THRU-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 237 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 238 ;
 239 ; LATE-LABEL: 'cttz'
 240 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 241 ; LATE-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 242 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 243 ;
 244 ; SIZE-LABEL: 'cttz'
 245 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 246 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 247 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 248 ;
 249 ; SIZE_LATE-LABEL: 'cttz'
 250 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 251 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 252 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 253 ;
 254   %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 255   %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 256   ret void
 257 }
 258
 259 define void @ctlz(i32 %a, <16 x i32> %va) {
 260 ; THRU-LABEL: 'ctlz'
 261 ; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 262 ; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 263 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 264 ;
 265 ; LATE-LABEL: 'ctlz'
 266 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 267 ; LATE-NEXT:  Cost Model: Found an estimated cost of 180 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 268 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 269 ;
 270 ; SIZE-LABEL: 'ctlz'
 271 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 272 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 273 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 274 ;
 275 ; SIZE_LATE-LABEL: 'ctlz'
 276 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 277 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 278 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 279 ;
 280   %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 281   %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 282   ret void
 283 }
 284
 285 define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
 286 ; THRU-LABEL: 'fshl'
 287 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 288 ; THRU-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 289 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 290 ;
 291 ; LATE-LABEL: 'fshl'
 292 ; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 293 ; LATE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 294 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 295 ;
 296 ; SIZE-LABEL: 'fshl'
 297 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 298 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 299 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 300 ;
 301 ; SIZE_LATE-LABEL: 'fshl'
 302 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 303 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 149 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 304 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 305 ;
 306   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 307   %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 308   ret void
 309 }
 310
 311 define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) {
 312 ; THRU-LABEL: 'maskedgather'
 313 ; THRU-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 314 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 315 ;
 316 ; LATE-LABEL: 'maskedgather'
 317 ; LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 318 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 319 ;
 320 ; SIZE-LABEL: 'maskedgather'
 321 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 322 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 323 ;
 324 ; SIZE_LATE-LABEL: 'maskedgather'
 325 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 326 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 327 ;
 328   %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 329   ret void
 330 }
 331
 332 define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) {
 333 ; THRU-LABEL: 'maskedscatter'
 334 ; THRU-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
 335 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 336 ;
 337 ; LATE-LABEL: 'maskedscatter'
 338 ; LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
 339 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 340 ;
 341 ; SIZE-LABEL: 'maskedscatter'
 342 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
 343 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 344 ;
 345 ; SIZE_LATE-LABEL: 'maskedscatter'
 346 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
 347 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 348 ;
 349   call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
 350   ret void
 351 }
 352
 353 define void @reduce_fmax(<16 x float> %va) {
 354 ; THRU-LABEL: 'reduce_fmax'
 355 ; THRU-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 356 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 357 ;
 358 ; LATE-LABEL: 'reduce_fmax'
 359 ; LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 360 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 361 ;
 362 ; SIZE-LABEL: 'reduce_fmax'
 363 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 364 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 365 ;
 366 ; SIZE_LATE-LABEL: 'reduce_fmax'
 367 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 368 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 369 ;
 370   %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 371   ret void
 372 }
 373
 374 define void @reduce_fmul(<16 x float> %va) {
 375 ; THRU-LABEL: 'reduce_fmul'
 376 ; THRU-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 377 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 378 ;
 379 ; LATE-LABEL: 'reduce_fmul'
 380 ; LATE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 381 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 382 ;
 383 ; SIZE-LABEL: 'reduce_fmul'
 384 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 385 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 386 ;
 387 ; SIZE_LATE-LABEL: 'reduce_fmul'
 388 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 389 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 390 ;
 391   %v = call float @llvm.vector.reduce.fmul.v16f32(float 42.0, <16 x float> %va)
 392   ret void
 393 }
 394
 395 define void @reduce_fadd_fast(<16 x float> %va) {
 396 ; THRU-LABEL: 'reduce_fadd_fast'
 397 ; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 398 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 399 ;
 400 ; LATE-LABEL: 'reduce_fadd_fast'
 401 ; LATE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 402 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 403 ;
 404 ; SIZE-LABEL: 'reduce_fadd_fast'
 405 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 406 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 407 ;
 408 ; SIZE_LATE-LABEL: 'reduce_fadd_fast'
 409 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 410 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 411 ;
 412   %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %va)
 413   ret void
 414 }
 415
 416 define void @memcpy(ptr %a, ptr %b, i32 %c) {
 417 ; THRU-LABEL: 'memcpy'
 418 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
 419 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 420 ;
 421 ; LATE-LABEL: 'memcpy'
 422 ; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
 423 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 424 ;
 425 ; SIZE-LABEL: 'memcpy'
 426 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
 427 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 428 ;
 429 ; SIZE_LATE-LABEL: 'memcpy'
 430 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
 431 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 432 ;
 433   call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
 434   ret void
 435 }