llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1
   3 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
   4 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
   5 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
   6 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
   7 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
   8 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
   9 ;
  10 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
  11 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
  12 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
  13
  14 define i32 @fadd(i32 %arg) {
  15 ; SSE1-LABEL: 'fadd'
  16 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef
  17 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef
  18 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fadd <8 x float> undef, undef
  19 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fadd <16 x float> undef, undef
  20 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef
  21 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fadd <2 x double> undef, undef
  22 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fadd <4 x double> undef, undef
  23 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fadd <8 x double> undef, undef
  24 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  25 ;
  26 ; SSE2-LABEL: 'fadd'
  27 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef
  28 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef
  29 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fadd <8 x float> undef, undef
  30 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fadd <16 x float> undef, undef
  31 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef
  32 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fadd <2 x double> undef, undef
  33 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fadd <4 x double> undef, undef
  34 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fadd <8 x double> undef, undef
  35 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  36 ;
  37 ; SSE42-LABEL: 'fadd'
  38 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef
  39 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef
  40 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fadd <8 x float> undef, undef
  41 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fadd <16 x float> undef, undef
  42 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef
  43 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fadd <2 x double> undef, undef
  44 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fadd <4 x double> undef, undef
  45 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fadd <8 x double> undef, undef
  46 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  47 ;
  48 ; AVX1-LABEL: 'fadd'
  49 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fadd float undef, undef
  50 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fadd <4 x float> undef, undef
  51 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fadd <8 x float> undef, undef
  52 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fadd <16 x float> undef, undef
  53 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fadd double undef, undef
  54 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fadd <2 x double> undef, undef
  55 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fadd <4 x double> undef, undef
  56 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fadd <8 x double> undef, undef
  57 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  58 ;
  59 ; AVX2-LABEL: 'fadd'
  60 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fadd float undef, undef
  61 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fadd <4 x float> undef, undef
  62 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
  63 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef
  64 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fadd double undef, undef
  65 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fadd <2 x double> undef, undef
  66 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
  67 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef
  68 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  69 ;
  70 ; AVX512-LABEL: 'fadd'
  71 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fadd float undef, undef
  72 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fadd <4 x float> undef, undef
  73 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
  74 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef
  75 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fadd double undef, undef
  76 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fadd <2 x double> undef, undef
  77 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
  78 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef
  79 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  80 ;
  81 ; SLM-LABEL: 'fadd'
  82 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef
  83 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef
  84 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fadd <8 x float> undef, undef
  85 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fadd <16 x float> undef, undef
  86 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef
  87 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fadd <2 x double> undef, undef
  88 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fadd <4 x double> undef, undef
  89 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fadd <8 x double> undef, undef
  90 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  91 ;
  92 ; GLM-LABEL: 'fadd'
  93 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fadd float undef, undef
  94 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fadd <4 x float> undef, undef
  95 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fadd <8 x float> undef, undef
  96 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fadd <16 x float> undef, undef
  97 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fadd double undef, undef
  98 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fadd <2 x double> undef, undef
  99 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fadd <4 x double> undef, undef
 100 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fadd <8 x double> undef, undef
 101 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 102 ;
 103   %F32 = fadd float undef, undef
 104   %V4F32 = fadd <4 x float> undef, undef
 105   %V8F32 = fadd <8 x float> undef, undef
 106   %V16F32 = fadd <16 x float> undef, undef
 107
 108   %F64 = fadd double undef, undef
 109   %V2F64 = fadd <2 x double> undef, undef
 110   %V4F64 = fadd <4 x double> undef, undef
 111   %V8F64 = fadd <8 x double> undef, undef
 112
 113   ret i32 undef
 114 }
 115
 116 define i32 @fsub(i32 %arg) {
 117 ; SSE1-LABEL: 'fsub'
 118 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef
 119 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef
 120 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> undef, undef
 121 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> undef, undef
 122 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef
 123 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> undef, undef
 124 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> undef, undef
 125 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> undef, undef
 126 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 127 ;
 128 ; SSE2-LABEL: 'fsub'
 129 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef
 130 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef
 131 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> undef, undef
 132 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> undef, undef
 133 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef
 134 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> undef, undef
 135 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> undef, undef
 136 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> undef, undef
 137 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 138 ;
 139 ; SSE42-LABEL: 'fsub'
 140 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef
 141 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef
 142 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> undef, undef
 143 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> undef, undef
 144 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef
 145 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> undef, undef
 146 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> undef, undef
 147 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> undef, undef
 148 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 149 ;
 150 ; AVX1-LABEL: 'fsub'
 151 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fsub float undef, undef
 152 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> undef, undef
 153 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> undef, undef
 154 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> undef, undef
 155 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fsub double undef, undef
 156 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> undef, undef
 157 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> undef, undef
 158 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> undef, undef
 159 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 160 ;
 161 ; AVX2-LABEL: 'fsub'
 162 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float undef, undef
 163 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> undef, undef
 164 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
 165 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef
 166 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double undef, undef
 167 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> undef, undef
 168 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
 169 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef
 170 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 171 ;
 172 ; AVX512-LABEL: 'fsub'
 173 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float undef, undef
 174 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> undef, undef
 175 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
 176 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef
 177 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double undef, undef
 178 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> undef, undef
 179 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
 180 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef
 181 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 182 ;
 183 ; SLM-LABEL: 'fsub'
 184 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef
 185 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef
 186 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> undef, undef
 187 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> undef, undef
 188 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef
 189 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> undef, undef
 190 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> undef, undef
 191 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> undef, undef
 192 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 193 ;
 194 ; GLM-LABEL: 'fsub'
 195 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float undef, undef
 196 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> undef, undef
 197 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> undef, undef
 198 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> undef, undef
 199 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double undef, undef
 200 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> undef, undef
 201 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> undef, undef
 202 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> undef, undef
 203 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 204 ;
 205   %F32 = fsub float undef, undef
 206   %V4F32 = fsub <4 x float> undef, undef
 207   %V8F32 = fsub <8 x float> undef, undef
 208   %V16F32 = fsub <16 x float> undef, undef
 209
 210   %F64 = fsub double undef, undef
 211   %V2F64 = fsub <2 x double> undef, undef
 212   %V4F64 = fsub <4 x double> undef, undef
 213   %V8F64 = fsub <8 x double> undef, undef
 214
 215   ret i32 undef
 216 }
 217
 218 define i32 @fneg_idiom(i32 %arg) {
 219 ; SSE1-LABEL: 'fneg_idiom'
 220 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
 221 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 222 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 223 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 224 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
 225 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 226 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 227 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 228 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 229 ;
 230 ; SSE2-LABEL: 'fneg_idiom'
 231 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
 232 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 233 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 234 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 235 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
 236 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 237 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 238 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 239 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 240 ;
 241 ; SSE42-LABEL: 'fneg_idiom'
 242 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
 243 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 244 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 245 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 246 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
 247 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 248 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 249 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 250 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 251 ;
 252 ; AVX1-LABEL: 'fneg_idiom'
 253 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fsub float -0.000000e+00, undef
 254 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 255 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 256 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 257 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fsub double -0.000000e+00, undef
 258 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 259 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 260 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 261 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 262 ;
 263 ; AVX2-LABEL: 'fneg_idiom'
 264 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef
 265 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 266 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 267 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 268 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
 269 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 270 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 271 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 272 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 273 ;
 274 ; AVX512-LABEL: 'fneg_idiom'
 275 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef
 276 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 277 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 278 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 279 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
 280 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 281 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 282 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 283 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 284 ;
 285 ; SLM-LABEL: 'fneg_idiom'
 286 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
 287 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 288 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 289 ; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 290 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
 291 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 292 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 293 ; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 294 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 295 ;
 296 ; GLM-LABEL: 'fneg_idiom'
 297 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
 298 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 299 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 300 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
 301 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
 302 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
 303 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 304 ; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
 305 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 306 ;
 307   %F32 = fsub float -0.0, undef
 308   %V4F32 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, undef
 309   %V8F32 = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
 310   %V16F32 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
 311
 312   %F64 = fsub double -0.0, undef
 313   %V2F64 = fsub <2 x double> <double -0.0, double -0.0>, undef
 314   %V4F64 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, undef
 315   %V8F64 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, undef
 316
 317   ret i32 undef
 318 }
 319
 320 define i32 @fneg(i32 %arg) {
 321 ; SSE1-LABEL: 'fneg'
 322 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
 323 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
 324 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
 325 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef
 326 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
 327 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
 328 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
 329 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
 330 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 331 ;
 332 ; SSE2-LABEL: 'fneg'
 333 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 334 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 335 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 336 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 337 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 338 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 339 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 340 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 341 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 342 ;
 343 ; SSE42-LABEL: 'fneg'
 344 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 345 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 346 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 347 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 348 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 349 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 350 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 351 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 352 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 353 ;
 354 ; AVX1-LABEL: 'fneg'
 355 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 356 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 357 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 358 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 359 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 360 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 361 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 362 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 363 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 364 ;
 365 ; AVX2-LABEL: 'fneg'
 366 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 367 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 368 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef
 369 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fneg <16 x float> undef
 370 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 371 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 372 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef
 373 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fneg <8 x double> undef
 374 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 375 ;
 376 ; AVX512-LABEL: 'fneg'
 377 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 378 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 379 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef
 380 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fneg <16 x float> undef
 381 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 382 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 383 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef
 384 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fneg <8 x double> undef
 385 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 386 ;
 387 ; SLM-LABEL: 'fneg'
 388 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 389 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 390 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 391 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 392 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 393 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 394 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 395 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 396 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 397 ;
 398 ; GLM-LABEL: 'fneg'
 399 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
 400 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
 401 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 402 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
 403 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
 404 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 405 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
 406 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
 407 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 408 ;
 409   %F32 = fneg float undef
 410   %V4F32 = fneg <4 x float> undef
 411   %V8F32 = fneg <8 x float> undef
 412   %V16F32 = fneg <16 x float> undef
 413
 414   %F64 = fneg double undef
 415   %V2F64 = fneg <2 x double> undef
 416   %V4F64 = fneg <4 x double> undef
 417   %V8F64 = fneg <8 x double> undef
 418
 419   ret i32 undef
 420 }
 421
 422 define i32 @fmul(i32 %arg) {
 423 ; SSE1-LABEL: 'fmul'
 424 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef
 425 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 426 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef
 427 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef
 428 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 429 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = fmul <2 x double> undef, undef
 430 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = fmul <4 x double> undef, undef
 431 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8F64 = fmul <8 x double> undef, undef
 432 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 433 ;
 434 ; SSE2-LABEL: 'fmul'
 435 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef
 436 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 437 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef
 438 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef
 439 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 440 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef
 441 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef
 442 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef
 443 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 444 ;
 445 ; SSE42-LABEL: 'fmul'
 446 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef
 447 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 448 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef
 449 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef
 450 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 451 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef
 452 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef
 453 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef
 454 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 455 ;
 456 ; AVX-LABEL: 'fmul'
 457 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef
 458 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 459 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fmul <8 x float> undef, undef
 460 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fmul <16 x float> undef, undef
 461 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 462 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef
 463 ; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fmul <4 x double> undef, undef
 464 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fmul <8 x double> undef, undef
 465 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 466 ;
 467 ; AVX512-LABEL: 'fmul'
 468 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fmul float undef, undef
 469 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fmul <4 x float> undef, undef
 470 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
 471 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef
 472 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fmul double undef, undef
 473 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fmul <2 x double> undef, undef
 474 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
 475 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef
 476 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 477 ;
 478 ; SLM-LABEL: 'fmul'
 479 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fmul float undef, undef
 480 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 481 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef
 482 ; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef
 483 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 484 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = fmul <2 x double> undef, undef
 485 ; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fmul <4 x double> undef, undef
 486 ; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = fmul <8 x double> undef, undef
 487 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 488 ;
 489 ; GLM-LABEL: 'fmul'
 490 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fmul float undef, undef
 491 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fmul <4 x float> undef, undef
 492 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fmul <8 x float> undef, undef
 493 ; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fmul <16 x float> undef, undef
 494 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fmul double undef, undef
 495 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fmul <2 x double> undef, undef
 496 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fmul <4 x double> undef, undef
 497 ; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fmul <8 x double> undef, undef
 498 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 499 ;
 500   %F32 = fmul float undef, undef
 501   %V4F32 = fmul <4 x float> undef, undef
 502   %V8F32 = fmul <8 x float> undef, undef
 503   %V16F32 = fmul <16 x float> undef, undef
 504
 505   %F64 = fmul double undef, undef
 506   %V2F64 = fmul <2 x double> undef, undef
 507   %V4F64 = fmul <4 x double> undef, undef
 508   %V8F64 = fmul <8 x double> undef, undef
 509
 510   ret i32 undef
 511 }
 512
 513 define i32 @fdiv(i32 %arg) {
 514 ; SSE1-LABEL: 'fdiv'
 515 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F32 = fdiv float undef, undef
 516 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 517 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 518 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 519 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
 520 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 521 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 522 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 304 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 523 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 524 ;
 525 ; SSE2-LABEL: 'fdiv'
 526 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %F32 = fdiv float undef, undef
 527 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 528 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 529 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 530 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
 531 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 532 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 533 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 534 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 535 ;
 536 ; SSE42-LABEL: 'fdiv'
 537 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
 538 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 539 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 540 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 541 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
 542 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 543 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 544 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 545 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 546 ;
 547 ; AVX1-LABEL: 'fdiv'
 548 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
 549 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 550 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 551 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 552 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
 553 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 554 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 555 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 556 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 557 ;
 558 ; AVX2-LABEL: 'fdiv'
 559 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %F32 = fdiv float undef, undef
 560 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 561 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 562 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 563 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F64 = fdiv double undef, undef
 564 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 565 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 566 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 567 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 568 ;
 569 ; AVX512-LABEL: 'fdiv'
 570 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %F32 = fdiv float undef, undef
 571 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 572 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 573 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 574 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
 575 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 576 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 577 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 578 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 579 ;
 580 ; SLM-LABEL: 'fdiv'
 581 ; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
 582 ; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 583 ; SLM-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 584 ; SLM-NEXT:  Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 585 ; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
 586 ; SLM-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 587 ; SLM-NEXT:  Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 588 ; SLM-NEXT:  Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 589 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 590 ;
 591 ; GLM-LABEL: 'fdiv'
 592 ; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
 593 ; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4F32 = fdiv <4 x float> undef, undef
 594 ; GLM-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = fdiv <8 x float> undef, undef
 595 ; GLM-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16F32 = fdiv <16 x float> undef, undef
 596 ; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
 597 ; GLM-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V2F64 = fdiv <2 x double> undef, undef
 598 ; GLM-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V4F64 = fdiv <4 x double> undef, undef
 599 ; GLM-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V8F64 = fdiv <8 x double> undef, undef
 600 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 601 ;
 602   %F32 = fdiv float undef, undef
 603   %V4F32 = fdiv <4 x float> undef, undef
 604   %V8F32 = fdiv <8 x float> undef, undef
 605   %V16F32 = fdiv <16 x float> undef, undef
 606
 607   %F64 = fdiv double undef, undef
 608   %V2F64 = fdiv <2 x double> undef, undef
 609   %V4F64 = fdiv <4 x double> undef, undef
 610   %V8F64 = fdiv <8 x double> undef, undef
 611
 612   ret i32 undef
 613 }
 614
 615 define i32 @frem(i32 %arg) {
 616 ; CHECK-LABEL: 'frem'
 617 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef
 618 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef
 619 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef
 620 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef
 621 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef
 622 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef
 623 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef
 624 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef
 625 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 626 ;
 627   %F32 = frem float undef, undef
 628   %V4F32 = frem <4 x float> undef, undef
 629   %V8F32 = frem <8 x float> undef, undef
 630   %V16F32 = frem <16 x float> undef, undef
 631
 632   %F64 = frem double undef, undef
 633   %V2F64 = frem <2 x double> undef, undef
 634   %V4F64 = frem <4 x double> undef, undef
 635   %V8F64 = frem <8 x double> undef, undef
 636
 637   ret i32 undef
 638 }
 639
 640 define i32 @fsqrt(i32 %arg) {
 641 ; SSE1-LABEL: 'fsqrt'
 642 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 643 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 644 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 645 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 646 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 647 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 648 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 649 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 650 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 651 ;
 652 ; SSE2-LABEL: 'fsqrt'
 653 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 654 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 655 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 656 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 657 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 658 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 659 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 660 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 661 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 662 ;
 663 ; SSE42-LABEL: 'fsqrt'
 664 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 665 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 666 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 667 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 668 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 669 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 670 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 671 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 672 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 673 ;
 674 ; AVX1-LABEL: 'fsqrt'
 675 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 676 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 677 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 678 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 679 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 680 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 681 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 682 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 683 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 684 ;
 685 ; AVX2-LABEL: 'fsqrt'
 686 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 687 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 688 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 689 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 690 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 691 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 692 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 693 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 694 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 695 ;
 696 ; AVX512-LABEL: 'fsqrt'
 697 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 698 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 699 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 700 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 701 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 702 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 703 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 704 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 705 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 706 ;
 707 ; SLM-LABEL: 'fsqrt'
 708 ; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 709 ; SLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 710 ; SLM-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 711 ; SLM-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 712 ; SLM-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 713 ; SLM-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 714 ; SLM-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 715 ; SLM-NEXT:  Cost Model: Found an estimated cost of 284 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 716 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 717 ;
 718 ; GLM-LABEL: 'fsqrt'
 719 ; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
 720 ; GLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 721 ; GLM-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 722 ; GLM-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 723 ; GLM-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 724 ; GLM-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 725 ; GLM-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 726 ; GLM-NEXT:  Cost Model: Found an estimated cost of 284 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 727 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 728 ;
 729   %F32 = call float @llvm.sqrt.f32(float undef)
 730   %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
 731   %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 732   %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 733
 734   %F64 = call double @llvm.sqrt.f64(double undef)
 735   %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 736   %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 737   %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 738
 739   ret i32 undef
 740 }
 741
 742 define i32 @fabs(i32 %arg) {
 743 ; SSE1-LABEL: 'fabs'
 744 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 745 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 746 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 747 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 748 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 749 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 750 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 751 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 752 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 753 ;
 754 ; SSE2-LABEL: 'fabs'
 755 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 756 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 757 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 758 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 759 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 760 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 761 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 762 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 763 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 764 ;
 765 ; SSE42-LABEL: 'fabs'
 766 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 767 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 768 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 769 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 770 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 771 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 772 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 773 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 774 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 775 ;
 776 ; AVX-LABEL: 'fabs'
 777 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 778 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 779 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 780 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 781 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 782 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 783 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 784 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 785 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 786 ;
 787 ; AVX512-LABEL: 'fabs'
 788 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 789 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 790 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 791 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 792 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 793 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 794 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 795 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 796 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 797 ;
 798 ; SLM-LABEL: 'fabs'
 799 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 800 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 801 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 802 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 803 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 804 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 805 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 806 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 807 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 808 ;
 809 ; GLM-LABEL: 'fabs'
 810 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
 811 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 812 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 813 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 814 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
 815 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 816 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 817 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 818 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 819 ;
 820   %F32 = call float @llvm.fabs.f32(float undef)
 821   %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
 822   %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
 823   %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 824
 825   %F64 = call double @llvm.fabs.f64(double undef)
 826   %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
 827   %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
 828   %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
 829
 830   ret i32 undef
 831 }
 832
 833 define i32 @fcopysign(i32 %arg) {
 834 ; SSE1-LABEL: 'fcopysign'
 835 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 836 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 837 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 838 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 839 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 840 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 841 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 842 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 843 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 844 ;
 845 ; SSE2-LABEL: 'fcopysign'
 846 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 847 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 848 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 849 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 850 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 851 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 852 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 853 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 854 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 855 ;
 856 ; SSE42-LABEL: 'fcopysign'
 857 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 858 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 859 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 860 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 861 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 862 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 863 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 864 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 865 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 866 ;
 867 ; AVX-LABEL: 'fcopysign'
 868 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 869 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 870 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 871 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 872 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 873 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 874 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 875 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 876 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 877 ;
 878 ; AVX512-LABEL: 'fcopysign'
 879 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 880 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 881 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 882 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 883 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 884 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 885 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 886 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 887 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 888 ;
 889 ; SLM-LABEL: 'fcopysign'
 890 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 891 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 892 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 893 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 894 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 895 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 896 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 897 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 898 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 899 ;
 900 ; GLM-LABEL: 'fcopysign'
 901 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 902 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 903 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 904 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 905 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
 906 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 907 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 908 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 909 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 910 ;
 911   %F32 = call float @llvm.copysign.f32(float undef, float undef)
 912   %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
 913   %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
 914   %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
 915
 916   %F64 = call double @llvm.copysign.f64(double undef, double undef)
 917   %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
 918   %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
 919   %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
 920
 921   ret i32 undef
 922 }
 923
 924 define i32 @fma(i32 %arg) {
 925 ; SSE1-LABEL: 'fma'
 926 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 927 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 928 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 929 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 930 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 931 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 932 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 933 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 934 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 935 ;
 936 ; SSE2-LABEL: 'fma'
 937 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 938 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 939 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 940 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 941 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 942 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 943 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 944 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 945 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 946 ;
 947 ; SSE42-LABEL: 'fma'
 948 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 949 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 950 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 951 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 952 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 953 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 954 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 955 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 956 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 957 ;
 958 ; AVX-LABEL: 'fma'
 959 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 960 ; AVX-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 961 ; AVX-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 962 ; AVX-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 963 ; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 964 ; AVX-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 965 ; AVX-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 966 ; AVX-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 967 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 968 ;
 969 ; AVX512-LABEL: 'fma'
 970 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 971 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 972 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 973 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 974 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 975 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 976 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 977 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 978 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 979 ;
 980 ; SLM-LABEL: 'fma'
 981 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 982 ; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 983 ; SLM-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 984 ; SLM-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 985 ; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 986 ; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 987 ; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 988 ; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
 989 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 990 ;
 991 ; GLM-LABEL: 'fma'
 992 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 993 ; GLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 994 ; GLM-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
 995 ; GLM-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
 996 ; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 997 ; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 998 ; GLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
 999 ; GLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
1000 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1001 ;
1002   %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
1003   %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
1004   %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
1005   %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
1006
1007   %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
1008   %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
1009   %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
1010   %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
1011
1012   ret i32 undef
1013 }
1014
1015 define i32 @rint(i32 %arg) {
1016 ; SSE1-LABEL: 'rint'
1017 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1018 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1019 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1020 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1021 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1022 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1023 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1024 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1025 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1026 ;
1027 ; SSE2-LABEL: 'rint'
1028 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1029 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1030 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1031 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1032 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1033 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1034 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1035 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1036 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1037 ;
1038 ; SSE42-LABEL: 'rint'
1039 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1040 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1041 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1042 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1043 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1044 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1045 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1046 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1047 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1048 ;
1049 ; AVX-LABEL: 'rint'
1050 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1051 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1052 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1053 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1054 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1055 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1056 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1057 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1058 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1059 ;
1060 ; AVX512-LABEL: 'rint'
1061 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1062 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1063 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1064 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1065 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1066 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1067 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1068 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1069 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1070 ;
1071 ; SLM-LABEL: 'rint'
1072 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1073 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1074 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1075 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1076 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1077 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1078 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1079 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1080 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1081 ;
1082 ; GLM-LABEL: 'rint'
1083 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
1084 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
1085 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
1086 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
1087 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
1088 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
1089 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
1090 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
1091 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1092 ;
1093   %F32 = call float @llvm.rint.f32.f32(float undef)
1094   %V4F32 = call <4 x float> @llvm.rint.v4f32.v4f32(<4 x float> undef)
1095   %V8F32 = call <8 x float> @llvm.rint.v8f32.v8f32(<8 x float> undef)
1096   %V16F32 = call <16 x float> @llvm.rint.v16f32.v16f32(<16 x float> undef)
1097
1098   %F64 = call double @llvm.rint.f64.f64(double undef)
1099   %V2F64 = call <2 x double> @llvm.rint.v2f64.v2f64(<2 x double> undef)
1100   %V4F64 = call <4 x double> @llvm.rint.v4f64.v4f64(<4 x double> undef)
1101   %V8F64 = call <8 x double> @llvm.rint.v8f64.v8f64(<8 x double> undef)
1102
1103   ret i32 undef
1104 }
1105
1106 define i32 @lrint(i32 %arg) {
1107 ; SSE1-LABEL: 'lrint'
1108 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1109 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1110 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1111 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1112 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1113 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1114 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1115 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1116 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1117 ;
1118 ; SSE2-LABEL: 'lrint'
1119 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1120 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1121 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1122 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1123 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1124 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1125 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1126 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1127 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1128 ;
1129 ; SSE42-LABEL: 'lrint'
1130 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1131 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1132 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1133 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1134 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1135 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1136 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1137 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1138 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1139 ;
1140 ; AVX-LABEL: 'lrint'
1141 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1142 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1143 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1144 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1145 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1146 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1147 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1148 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1149 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1150 ;
1151 ; AVX512-LABEL: 'lrint'
1152 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1153 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1154 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1155 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1156 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1157 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1158 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1159 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1160 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1161 ;
1162 ; SLM-LABEL: 'lrint'
1163 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1164 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1165 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1166 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1167 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1168 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1169 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1170 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1171 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1172 ;
1173 ; GLM-LABEL: 'lrint'
1174 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1175 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1176 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1177 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1178 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1179 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1180 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1181 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1182 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1183 ;
1184   %F32 = call i32 @llvm.lrint.i32.f32(float undef)
1185   %V4F32 = call <4 x i32> @llvm.lrint.v4i32.v4f32(<4 x float> undef)
1186   %V8F32 = call <8 x i32> @llvm.lrint.v8i32.v8f32(<8 x float> undef)
1187   %V16F32 = call <16 x i32> @llvm.lrint.v16i32.v16f32(<16 x float> undef)
1188
1189   %F64 = call i32 @llvm.lrint.i32.f64(double undef)
1190   %V2F64 = call <2 x i32> @llvm.lrint.v2i32.v2f64(<2 x double> undef)
1191   %V4F64 = call <4 x i32> @llvm.lrint.v4i32.v4f64(<4 x double> undef)
1192   %V8F64 = call <8 x i32> @llvm.lrint.v8i32.v8f64(<8 x double> undef)
1193
1194   ret i32 undef
1195 }
1196
1197 define i32 @llrint(i32 %arg) {
1198 ; CHECK-LABEL: 'llrint'
1199 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call i64 @llvm.llrint.i64.f32(float undef)
1200 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
1201 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
1202 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
1203 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call i64 @llvm.llrint.i64.f64(double undef)
1204 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
1205 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
1206 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
1207 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
1208 ;
1209   %F32 = call i64 @llvm.llrint.i64.f32(float undef)
1210   %V4F32 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
1211   %V8F32 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
1212   %V16F32 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
1213
1214   %F64 = call i64 @llvm.llrint.i64.f64(double undef)
1215   %V2F64 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
1216   %V4F64 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
1217   %V8F64 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
1218
1219   ret i32 undef
1220 }
1221
1222 declare float @llvm.sqrt.f32(float)
1223 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1224 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
1225 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
1226
1227 declare double @llvm.sqrt.f64(double)
1228 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1229 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
1230 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
1231
1232 declare float @llvm.fabs.f32(float)
1233 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
1234 declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
1235 declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
1236
1237 declare double @llvm.fabs.f64(double)
1238 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
1239 declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
1240 declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
1241
1242 declare float @llvm.copysign.f32(float, float)
1243 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1244 declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
1245 declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
1246
1247 declare double @llvm.copysign.f64(double, double)
1248 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
1249 declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
1250 declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
1251
1252 declare float @llvm.fma.f32(float, float, float)
1253 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
1254 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
1255 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
1256
1257 declare double @llvm.fma.f64(double, double, double)
1258 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
1259 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
1260 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)