llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
   3 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
   4 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
   5 ; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
   6
   7 ; Test a cross-section of intrinsics for various cost-kinds.
   8 ; Other test files may check for accuracy of a particular intrinsic
   9 ; across subtargets or types. This is just a basic correctness check using the
  10 ; default x86 target and a legal scalar type (i32/float) and/or an
  11 ; illegal vector type (16 x i32/float).
  12
  13 declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
  14 declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
  15
  16 declare i32 @llvm.smax.i32(i32, i32)
  17 declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
  18
  19 declare float @llvm.fmuladd.f32(float, float, float)
  20 declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
  21
  22 declare float @llvm.log2.f32(float)
  23 declare <16 x float> @llvm.log2.v16f32(<16 x float>)
  24
  25 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
  26 declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
  27
  28 declare float @llvm.maximum.f32(float, float)
  29 declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
  30
  31 declare i32 @llvm.cttz.i32(i32, i1)
  32 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
  33
  34 declare i32 @llvm.ctlz.i32(i32, i1)
  35 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
  36
  37 declare i32 @llvm.fshl.i32(i32, i32, i32)
  38 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
  39
  40 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
  41 declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
  42 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
  43 declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>)
  44 declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
  45
  46 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
  47
  48 define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
  49 ; THRU-LABEL: 'umul'
  50 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  51 ; THRU-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  52 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  53 ;
  54 ; LATE-LABEL: 'umul'
  55 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  56 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  57 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  58 ;
  59 ; SIZE-LABEL: 'umul'
  60 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  61 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  62 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  63 ;
  64 ; SIZE_LATE-LABEL: 'umul'
  65 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  66 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  67 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  68 ;
  69   %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
  70   %v = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
  71   ret void
  72 }
  73
  74 define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
  75 ; THRU-LABEL: 'smax'
  76 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  77 ; THRU-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  78 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  79 ;
  80 ; LATE-LABEL: 'smax'
  81 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  82 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  83 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  84 ;
  85 ; SIZE-LABEL: 'smax'
  86 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  87 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  88 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  89 ;
  90 ; SIZE_LATE-LABEL: 'smax'
  91 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  92 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  93 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  94 ;
  95   %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
  96   %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
  97   ret void
  98 }
  99
 100 define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
 101 ; THRU-LABEL: 'fmuladd'
 102 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 103 ; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 104 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 105 ;
 106 ; LATE-LABEL: 'fmuladd'
 107 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 108 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 109 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 110 ;
 111 ; SIZE-LABEL: 'fmuladd'
 112 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 113 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 114 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 115 ;
 116 ; SIZE_LATE-LABEL: 'fmuladd'
 117 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 118 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 119 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 120 ;
 121   %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
 122   %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
 123   ret void
 124 }
 125
 126 define void @log2(float %a, <16 x float> %va) {
 127 ; THRU-LABEL: 'log2'
 128 ; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
 129 ; THRU-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 130 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 131 ;
 132 ; LATE-LABEL: 'log2'
 133 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.log2.f32(float %a)
 134 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 135 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 136 ;
 137 ; SIZE-LABEL: 'log2'
 138 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
 139 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 140 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 141 ;
 142 ; SIZE_LATE-LABEL: 'log2'
 143 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
 144 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 145 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 146 ;
 147   %s = call float @llvm.log2.f32(float %a)
 148   %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
 149   ret void
 150 }
 151
 152 define void @constrained_fadd(float %a, <16 x float> %va) {
 153 ; THRU-LABEL: 'constrained_fadd'
 154 ; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 155 ; THRU-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 156 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 157 ;
 158 ; LATE-LABEL: 'constrained_fadd'
 159 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 160 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 161 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 162 ;
 163 ; SIZE-LABEL: 'constrained_fadd'
 164 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 165 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 166 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 167 ;
 168 ; SIZE_LATE-LABEL: 'constrained_fadd'
 169 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 170 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 171 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 172 ;
 173   %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 174   %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
 175   ret void
 176 }
 177
 178 define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
 179 ; THRU-LABEL: 'fmaximum'
 180 ; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 181 ; THRU-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 182 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 183 ;
 184 ; LATE-LABEL: 'fmaximum'
 185 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 186 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 187 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 188 ;
 189 ; SIZE-LABEL: 'fmaximum'
 190 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 191 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 192 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 193 ;
 194 ; SIZE_LATE-LABEL: 'fmaximum'
 195 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
 196 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 197 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 198 ;
 199   %s = call float @llvm.maximum.f32(float %a, float %b)
 200   %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 201   ret void
 202 }
 203
 204 define void @cttz(i32 %a, <16 x i32> %va) {
 205 ; THRU-LABEL: 'cttz'
 206 ; THRU-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 207 ; THRU-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 208 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 209 ;
 210 ; LATE-LABEL: 'cttz'
 211 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 212 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 213 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 214 ;
 215 ; SIZE-LABEL: 'cttz'
 216 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 217 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 218 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 219 ;
 220 ; SIZE_LATE-LABEL: 'cttz'
 221 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 222 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 223 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 224 ;
 225   %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
 226   %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 227   ret void
 228 }
 229
 230 define void @ctlz(i32 %a, <16 x i32> %va) {
 231 ; THRU-LABEL: 'ctlz'
 232 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 233 ; THRU-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 234 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 235 ;
 236 ; LATE-LABEL: 'ctlz'
 237 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 238 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 239 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 240 ;
 241 ; SIZE-LABEL: 'ctlz'
 242 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 243 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 244 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 245 ;
 246 ; SIZE_LATE-LABEL: 'ctlz'
 247 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 248 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 249 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 250 ;
 251   %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
 252   %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 253   ret void
 254 }
 255
 256 define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
 257 ; THRU-LABEL: 'fshl'
 258 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 259 ; THRU-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 260 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 261 ;
 262 ; LATE-LABEL: 'fshl'
 263 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 264 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 265 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 266 ;
 267 ; SIZE-LABEL: 'fshl'
 268 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 269 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 270 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 271 ;
 272 ; SIZE_LATE-LABEL: 'fshl'
 273 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 274 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 275 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 276 ;
 277   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 278   %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 279   ret void
 280 }
 281
 282 define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
 283 ; THRU-LABEL: 'maskedgather'
 284 ; THRU-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 285 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 286 ;
 287 ; LATE-LABEL: 'maskedgather'
 288 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 289 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 290 ;
 291 ; SIZE-LABEL: 'maskedgather'
 292 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 293 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 294 ;
 295 ; SIZE_LATE-LABEL: 'maskedgather'
 296 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 297 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 298 ;
 299   %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
 300   ret void
 301 }
 302
 303 define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
 304 ; THRU-LABEL: 'maskedscatter'
 305 ; THRU-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 306 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 307 ;
 308 ; LATE-LABEL: 'maskedscatter'
 309 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 310 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 311 ;
 312 ; SIZE-LABEL: 'maskedscatter'
 313 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 314 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 315 ;
 316 ; SIZE_LATE-LABEL: 'maskedscatter'
 317 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 318 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 319 ;
 320   call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
 321   ret void
 322 }
 323
 324 define void @reduce_fmax(<16 x float> %va) {
 325 ; THRU-LABEL: 'reduce_fmax'
 326 ; THRU-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 327 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 328 ;
 329 ; LATE-LABEL: 'reduce_fmax'
 330 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 331 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 332 ;
 333 ; SIZE-LABEL: 'reduce_fmax'
 334 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 335 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 336 ;
 337 ; SIZE_LATE-LABEL: 'reduce_fmax'
 338 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 339 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 340 ;
 341   %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
 342   ret void
 343 }
 344
 345 define void @reduce_fmul(<16 x float> %va) {
 346 ; THRU-LABEL: 'reduce_fmul'
 347 ; THRU-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 348 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 349 ;
 350 ; LATE-LABEL: 'reduce_fmul'
 351 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 352 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 353 ;
 354 ; SIZE-LABEL: 'reduce_fmul'
 355 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 356 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 357 ;
 358 ; SIZE_LATE-LABEL: 'reduce_fmul'
 359 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va)
 360 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 361 ;
 362   %v = call float @llvm.vector.reduce.fmul.v16f32(float 42.0, <16 x float> %va)
 363   ret void
 364 }
 365
 366 define void @reduce_fadd_fast(<16 x float> %va) {
 367 ; THRU-LABEL: 'reduce_fadd_fast'
 368 ; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 369 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 370 ;
 371 ; LATE-LABEL: 'reduce_fadd_fast'
 372 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 373 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 374 ;
 375 ; SIZE-LABEL: 'reduce_fadd_fast'
 376 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 377 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 378 ;
 379 ; SIZE_LATE-LABEL: 'reduce_fadd_fast'
 380 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va)
 381 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 382 ;
 383   %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %va)
 384   ret void
 385 }
 386
 387 define void @memcpy(i8* %a, i8* %b, i32 %c) {
 388 ; THRU-LABEL: 'memcpy'
 389 ; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
 390 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 391 ;
 392 ; LATE-LABEL: 'memcpy'
 393 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
 394 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 395 ;
 396 ; SIZE-LABEL: 'memcpy'
 397 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
 398 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 399 ;
 400 ; SIZE_LATE-LABEL: 'memcpy'
 401 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
 402 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 403 ;
 404   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
 405   ret void
 406 }